rbelanec commited on
Commit
c1e6f71
verified
1 Parent(s): 85df2f8

Training in progress, step 38150

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +376 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18e2a7dc2ac44f5d10d075ff4ed0ab3bdc950cef1f9684183790ee5d7b8b7541
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf0b71c50fa7ea23eaf1cbe7b4cf79c81a2c0b707210a4d66d171a9dc884e4c
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -7272,3 +7272,379 @@
7272
  {"current_steps": 36265, "total_steps": 38150, "loss": 0.1215, "lr": 3.7131374758889446e-07, "epoch": 9.505897771952817, "percentage": 95.06, "elapsed_time": "1:01:14", "remaining_time": "0:03:11", "throughput": 2645.51, "total_tokens": 9721896}
7273
  {"current_steps": 36270, "total_steps": 38150, "loss": 0.1738, "lr": 3.693524234043594e-07, "epoch": 9.507208387942333, "percentage": 95.07, "elapsed_time": "1:01:15", "remaining_time": "0:03:10", "throughput": 2645.59, "total_tokens": 9723480}
7274
  {"current_steps": 36275, "total_steps": 38150, "loss": 0.1459, "lr": 3.6739625437180457e-07, "epoch": 9.508519003931848, "percentage": 95.09, "elapsed_time": "1:01:15", "remaining_time": "0:03:09", "throughput": 2645.7, "total_tokens": 9725224}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7272
  {"current_steps": 36265, "total_steps": 38150, "loss": 0.1215, "lr": 3.7131374758889446e-07, "epoch": 9.505897771952817, "percentage": 95.06, "elapsed_time": "1:01:14", "remaining_time": "0:03:11", "throughput": 2645.51, "total_tokens": 9721896}
7273
  {"current_steps": 36270, "total_steps": 38150, "loss": 0.1738, "lr": 3.693524234043594e-07, "epoch": 9.507208387942333, "percentage": 95.07, "elapsed_time": "1:01:15", "remaining_time": "0:03:10", "throughput": 2645.59, "total_tokens": 9723480}
7274
  {"current_steps": 36275, "total_steps": 38150, "loss": 0.1459, "lr": 3.6739625437180457e-07, "epoch": 9.508519003931848, "percentage": 95.09, "elapsed_time": "1:01:15", "remaining_time": "0:03:09", "throughput": 2645.7, "total_tokens": 9725224}
7275
+ {"current_steps": 36280, "total_steps": 38150, "loss": 0.3996, "lr": 3.654452409006498e-07, "epoch": 9.509829619921362, "percentage": 95.1, "elapsed_time": "1:01:16", "remaining_time": "0:03:09", "throughput": 2645.73, "total_tokens": 9726520}
7276
+ {"current_steps": 36285, "total_steps": 38150, "loss": 0.0894, "lr": 3.634993833992434e-07, "epoch": 9.511140235910878, "percentage": 95.11, "elapsed_time": "1:01:16", "remaining_time": "0:03:08", "throughput": 2645.66, "total_tokens": 9727352}
7277
+ {"current_steps": 36290, "total_steps": 38150, "loss": 0.0975, "lr": 3.615586822748457e-07, "epoch": 9.512450851900393, "percentage": 95.12, "elapsed_time": "1:01:17", "remaining_time": "0:03:08", "throughput": 2645.72, "total_tokens": 9728760}
7278
+ {"current_steps": 36295, "total_steps": 38150, "loss": 0.0946, "lr": 3.596231379336429e-07, "epoch": 9.513761467889909, "percentage": 95.14, "elapsed_time": "1:01:17", "remaining_time": "0:03:07", "throughput": 2645.81, "total_tokens": 9730360}
7279
+ {"current_steps": 36300, "total_steps": 38150, "loss": 0.1334, "lr": 3.5769275078074425e-07, "epoch": 9.515072083879424, "percentage": 95.15, "elapsed_time": "1:01:18", "remaining_time": "0:03:07", "throughput": 2645.84, "total_tokens": 9731688}
7280
+ {"current_steps": 36305, "total_steps": 38150, "loss": 0.2273, "lr": 3.5576752122017385e-07, "epoch": 9.516382699868938, "percentage": 95.16, "elapsed_time": "1:01:18", "remaining_time": "0:03:06", "throughput": 2645.9, "total_tokens": 9733160}
7281
+ {"current_steps": 36310, "total_steps": 38150, "loss": 0.0992, "lr": 3.5384744965487883e-07, "epoch": 9.517693315858454, "percentage": 95.18, "elapsed_time": "1:01:19", "remaining_time": "0:03:06", "throughput": 2645.97, "total_tokens": 9734760}
7282
+ {"current_steps": 36315, "total_steps": 38150, "loss": 0.0892, "lr": 3.519325364867265e-07, "epoch": 9.51900393184797, "percentage": 95.19, "elapsed_time": "1:01:19", "remaining_time": "0:03:05", "throughput": 2645.99, "total_tokens": 9735960}
7283
+ {"current_steps": 36320, "total_steps": 38150, "loss": 0.1743, "lr": 3.5002278211650476e-07, "epoch": 9.520314547837483, "percentage": 95.2, "elapsed_time": "1:01:19", "remaining_time": "0:03:05", "throughput": 2646.05, "total_tokens": 9737416}
7284
+ {"current_steps": 36325, "total_steps": 38150, "loss": 0.1689, "lr": 3.4811818694392706e-07, "epoch": 9.521625163826998, "percentage": 95.22, "elapsed_time": "1:01:20", "remaining_time": "0:03:04", "throughput": 2646.11, "total_tokens": 9738888}
7285
+ {"current_steps": 36330, "total_steps": 38150, "loss": 0.0643, "lr": 3.462187513676135e-07, "epoch": 9.522935779816514, "percentage": 95.23, "elapsed_time": "1:01:20", "remaining_time": "0:03:04", "throughput": 2646.08, "total_tokens": 9739880}
7286
+ {"current_steps": 36335, "total_steps": 38150, "loss": 0.0937, "lr": 3.4432447578511826e-07, "epoch": 9.52424639580603, "percentage": 95.24, "elapsed_time": "1:01:21", "remaining_time": "0:03:03", "throughput": 2646.14, "total_tokens": 9741336}
7287
+ {"current_steps": 36340, "total_steps": 38150, "loss": 0.1941, "lr": 3.4243536059291035e-07, "epoch": 9.525557011795543, "percentage": 95.26, "elapsed_time": "1:01:21", "remaining_time": "0:03:03", "throughput": 2646.2, "total_tokens": 9742776}
7288
+ {"current_steps": 36345, "total_steps": 38150, "loss": 0.0908, "lr": 3.405514061863735e-07, "epoch": 9.526867627785059, "percentage": 95.27, "elapsed_time": "1:01:22", "remaining_time": "0:03:02", "throughput": 2646.24, "total_tokens": 9744072}
7289
+ {"current_steps": 36350, "total_steps": 38150, "loss": 0.2079, "lr": 3.3867261295982e-07, "epoch": 9.528178243774574, "percentage": 95.28, "elapsed_time": "1:01:22", "remaining_time": "0:03:02", "throughput": 2646.28, "total_tokens": 9745384}
7290
+ {"current_steps": 36355, "total_steps": 38150, "loss": 0.2697, "lr": 3.36798981306477e-07, "epoch": 9.52948885976409, "percentage": 95.29, "elapsed_time": "1:01:23", "remaining_time": "0:03:01", "throughput": 2646.44, "total_tokens": 9747400}
7291
+ {"current_steps": 36360, "total_steps": 38150, "loss": 0.1744, "lr": 3.3493051161849477e-07, "epoch": 9.530799475753604, "percentage": 95.31, "elapsed_time": "1:01:23", "remaining_time": "0:03:01", "throughput": 2646.53, "total_tokens": 9749032}
7292
+ {"current_steps": 36365, "total_steps": 38150, "loss": 0.195, "lr": 3.330672042869354e-07, "epoch": 9.53211009174312, "percentage": 95.32, "elapsed_time": "1:01:24", "remaining_time": "0:03:00", "throughput": 2646.6, "total_tokens": 9750648}
7293
+ {"current_steps": 36370, "total_steps": 38150, "loss": 0.1156, "lr": 3.312090597017925e-07, "epoch": 9.533420707732635, "percentage": 95.33, "elapsed_time": "1:01:24", "remaining_time": "0:03:00", "throughput": 2646.67, "total_tokens": 9752392}
7294
+ {"current_steps": 36375, "total_steps": 38150, "loss": 0.1068, "lr": 3.2935607825196614e-07, "epoch": 9.534731323722148, "percentage": 95.35, "elapsed_time": "1:01:25", "remaining_time": "0:02:59", "throughput": 2646.77, "total_tokens": 9754152}
7295
+ {"current_steps": 36380, "total_steps": 38150, "loss": 0.0785, "lr": 3.275082603252877e-07, "epoch": 9.536041939711664, "percentage": 95.36, "elapsed_time": "1:01:25", "remaining_time": "0:02:59", "throughput": 2646.74, "total_tokens": 9755128}
7296
+ {"current_steps": 36385, "total_steps": 38150, "loss": 0.0825, "lr": 3.2566560630849784e-07, "epoch": 9.53735255570118, "percentage": 95.37, "elapsed_time": "1:01:26", "remaining_time": "0:02:58", "throughput": 2646.76, "total_tokens": 9756408}
7297
+ {"current_steps": 36390, "total_steps": 38150, "loss": 0.1829, "lr": 3.2382811658726033e-07, "epoch": 9.538663171690695, "percentage": 95.39, "elapsed_time": "1:01:26", "remaining_time": "0:02:58", "throughput": 2646.74, "total_tokens": 9757432}
7298
+ {"current_steps": 36395, "total_steps": 38150, "loss": 0.0727, "lr": 3.219957915461619e-07, "epoch": 9.539973787680209, "percentage": 95.4, "elapsed_time": "1:01:27", "remaining_time": "0:02:57", "throughput": 2646.79, "total_tokens": 9758856}
7299
+ {"current_steps": 36400, "total_steps": 38150, "loss": 0.1351, "lr": 3.201686315687041e-07, "epoch": 9.541284403669724, "percentage": 95.41, "elapsed_time": "1:01:27", "remaining_time": "0:02:57", "throughput": 2646.98, "total_tokens": 9761496}
7300
+ {"current_steps": 36405, "total_steps": 38150, "loss": 0.145, "lr": 3.183466370373117e-07, "epoch": 9.54259501965924, "percentage": 95.43, "elapsed_time": "1:01:28", "remaining_time": "0:02:56", "throughput": 2646.98, "total_tokens": 9762616}
7301
+ {"current_steps": 36410, "total_steps": 38150, "loss": 0.1571, "lr": 3.1652980833331846e-07, "epoch": 9.543905635648755, "percentage": 95.44, "elapsed_time": "1:01:28", "remaining_time": "0:02:56", "throughput": 2647.0, "total_tokens": 9763864}
7302
+ {"current_steps": 36415, "total_steps": 38150, "loss": 0.1474, "lr": 3.147181458369897e-07, "epoch": 9.54521625163827, "percentage": 95.45, "elapsed_time": "1:01:29", "remaining_time": "0:02:55", "throughput": 2647.0, "total_tokens": 9764968}
7303
+ {"current_steps": 36420, "total_steps": 38150, "loss": 0.1954, "lr": 3.12911649927497e-07, "epoch": 9.546526867627785, "percentage": 95.47, "elapsed_time": "1:01:29", "remaining_time": "0:02:55", "throughput": 2647.06, "total_tokens": 9766424}
7304
+ {"current_steps": 36425, "total_steps": 38150, "loss": 0.0972, "lr": 3.111103209829408e-07, "epoch": 9.5478374836173, "percentage": 95.48, "elapsed_time": "1:01:30", "remaining_time": "0:02:54", "throughput": 2647.13, "total_tokens": 9767912}
7305
+ {"current_steps": 36430, "total_steps": 38150, "loss": 0.1082, "lr": 3.0931415938033336e-07, "epoch": 9.549148099606816, "percentage": 95.49, "elapsed_time": "1:01:30", "remaining_time": "0:02:54", "throughput": 2647.09, "total_tokens": 9768904}
7306
+ {"current_steps": 36435, "total_steps": 38150, "loss": 0.1571, "lr": 3.075231654956129e-07, "epoch": 9.55045871559633, "percentage": 95.5, "elapsed_time": "1:01:30", "remaining_time": "0:02:53", "throughput": 2647.18, "total_tokens": 9770552}
7307
+ {"current_steps": 36440, "total_steps": 38150, "loss": 0.2277, "lr": 3.0573733970362674e-07, "epoch": 9.551769331585845, "percentage": 95.52, "elapsed_time": "1:01:31", "remaining_time": "0:02:53", "throughput": 2647.19, "total_tokens": 9771704}
7308
+ {"current_steps": 36445, "total_steps": 38150, "loss": 0.1817, "lr": 3.0395668237814813e-07, "epoch": 9.55307994757536, "percentage": 95.53, "elapsed_time": "1:01:31", "remaining_time": "0:02:52", "throughput": 2647.29, "total_tokens": 9773320}
7309
+ {"current_steps": 36450, "total_steps": 38150, "loss": 0.1473, "lr": 3.0218119389186503e-07, "epoch": 9.554390563564876, "percentage": 95.54, "elapsed_time": "1:01:32", "remaining_time": "0:02:52", "throughput": 2647.28, "total_tokens": 9774424}
7310
+ {"current_steps": 36455, "total_steps": 38150, "loss": 0.0602, "lr": 3.0041087461638304e-07, "epoch": 9.55570117955439, "percentage": 95.56, "elapsed_time": "1:01:32", "remaining_time": "0:02:51", "throughput": 2647.23, "total_tokens": 9775336}
7311
+ {"current_steps": 36460, "total_steps": 38150, "loss": 0.2043, "lr": 2.986457249222252e-07, "epoch": 9.557011795543906, "percentage": 95.57, "elapsed_time": "1:01:33", "remaining_time": "0:02:51", "throughput": 2647.27, "total_tokens": 9776680}
7312
+ {"current_steps": 36465, "total_steps": 38150, "loss": 0.1639, "lr": 2.968857451788404e-07, "epoch": 9.558322411533421, "percentage": 95.58, "elapsed_time": "1:01:33", "remaining_time": "0:02:50", "throughput": 2647.33, "total_tokens": 9778088}
7313
+ {"current_steps": 36470, "total_steps": 38150, "loss": 0.1169, "lr": 2.951309357545812e-07, "epoch": 9.559633027522935, "percentage": 95.6, "elapsed_time": "1:01:34", "remaining_time": "0:02:50", "throughput": 2647.44, "total_tokens": 9779752}
7314
+ {"current_steps": 36475, "total_steps": 38150, "loss": 0.1098, "lr": 2.9338129701673434e-07, "epoch": 9.56094364351245, "percentage": 95.61, "elapsed_time": "1:01:34", "remaining_time": "0:02:49", "throughput": 2647.52, "total_tokens": 9781368}
7315
+ {"current_steps": 36480, "total_steps": 38150, "loss": 0.0593, "lr": 2.916368293314903e-07, "epoch": 9.562254259501966, "percentage": 95.62, "elapsed_time": "1:01:34", "remaining_time": "0:02:49", "throughput": 2647.54, "total_tokens": 9782600}
7316
+ {"current_steps": 36485, "total_steps": 38150, "loss": 0.157, "lr": 2.898975330639653e-07, "epoch": 9.563564875491481, "percentage": 95.64, "elapsed_time": "1:01:35", "remaining_time": "0:02:48", "throughput": 2647.61, "total_tokens": 9784184}
7317
+ {"current_steps": 36490, "total_steps": 38150, "loss": 0.1182, "lr": 2.8816340857819034e-07, "epoch": 9.564875491480995, "percentage": 95.65, "elapsed_time": "1:01:35", "remaining_time": "0:02:48", "throughput": 2647.67, "total_tokens": 9785672}
7318
+ {"current_steps": 36495, "total_steps": 38150, "loss": 0.1804, "lr": 2.8643445623711684e-07, "epoch": 9.56618610747051, "percentage": 95.66, "elapsed_time": "1:01:36", "remaining_time": "0:02:47", "throughput": 2647.71, "total_tokens": 9786952}
7319
+ {"current_steps": 36500, "total_steps": 38150, "loss": 0.0912, "lr": 2.847106764026081e-07, "epoch": 9.567496723460026, "percentage": 95.67, "elapsed_time": "1:01:36", "remaining_time": "0:02:47", "throughput": 2647.7, "total_tokens": 9788040}
7320
+ {"current_steps": 36505, "total_steps": 38150, "loss": 0.1348, "lr": 2.8299206943545044e-07, "epoch": 9.568807339449542, "percentage": 95.69, "elapsed_time": "1:01:37", "remaining_time": "0:02:46", "throughput": 2647.7, "total_tokens": 9789144}
7321
+ {"current_steps": 36510, "total_steps": 38150, "loss": 0.0902, "lr": 2.812786356953451e-07, "epoch": 9.570117955439056, "percentage": 95.7, "elapsed_time": "1:01:37", "remaining_time": "0:02:46", "throughput": 2647.68, "total_tokens": 9790168}
7322
+ {"current_steps": 36515, "total_steps": 38150, "loss": 0.1586, "lr": 2.795703755409107e-07, "epoch": 9.571428571428571, "percentage": 95.71, "elapsed_time": "1:01:38", "remaining_time": "0:02:45", "throughput": 2647.73, "total_tokens": 9791672}
7323
+ {"current_steps": 36520, "total_steps": 38150, "loss": 0.1582, "lr": 2.7786728932968354e-07, "epoch": 9.572739187418087, "percentage": 95.73, "elapsed_time": "1:01:38", "remaining_time": "0:02:45", "throughput": 2647.79, "total_tokens": 9793048}
7324
+ {"current_steps": 36525, "total_steps": 38150, "loss": 0.1955, "lr": 2.761693774181173e-07, "epoch": 9.574049803407602, "percentage": 95.74, "elapsed_time": "1:01:39", "remaining_time": "0:02:44", "throughput": 2647.87, "total_tokens": 9794648}
7325
+ {"current_steps": 36530, "total_steps": 38150, "loss": 0.0856, "lr": 2.7447664016157783e-07, "epoch": 9.575360419397116, "percentage": 95.75, "elapsed_time": "1:01:39", "remaining_time": "0:02:44", "throughput": 2647.91, "total_tokens": 9795928}
7326
+ {"current_steps": 36535, "total_steps": 38150, "loss": 0.1835, "lr": 2.727890779143566e-07, "epoch": 9.576671035386632, "percentage": 95.77, "elapsed_time": "1:01:39", "remaining_time": "0:02:43", "throughput": 2647.92, "total_tokens": 9797064}
7327
+ {"current_steps": 36540, "total_steps": 38150, "loss": 0.1122, "lr": 2.7110669102965723e-07, "epoch": 9.577981651376147, "percentage": 95.78, "elapsed_time": "1:01:40", "remaining_time": "0:02:43", "throughput": 2647.9, "total_tokens": 9798120}
7328
+ {"current_steps": 36545, "total_steps": 38150, "loss": 0.1762, "lr": 2.694294798595981e-07, "epoch": 9.579292267365663, "percentage": 95.79, "elapsed_time": "1:01:40", "remaining_time": "0:02:42", "throughput": 2647.9, "total_tokens": 9799336}
7329
+ {"current_steps": 36550, "total_steps": 38150, "loss": 0.1489, "lr": 2.677574447552178e-07, "epoch": 9.580602883355176, "percentage": 95.81, "elapsed_time": "1:01:41", "remaining_time": "0:02:42", "throughput": 2647.93, "total_tokens": 9800664}
7330
+ {"current_steps": 36555, "total_steps": 38150, "loss": 0.0675, "lr": 2.660905860664697e-07, "epoch": 9.581913499344692, "percentage": 95.82, "elapsed_time": "1:01:41", "remaining_time": "0:02:41", "throughput": 2647.89, "total_tokens": 9801656}
7331
+ {"current_steps": 36560, "total_steps": 38150, "loss": 0.1212, "lr": 2.6442890414222487e-07, "epoch": 9.583224115334207, "percentage": 95.83, "elapsed_time": "1:01:42", "remaining_time": "0:02:41", "throughput": 2648.0, "total_tokens": 9803416}
7332
+ {"current_steps": 36565, "total_steps": 38150, "loss": 0.1392, "lr": 2.6277239933027163e-07, "epoch": 9.584534731323721, "percentage": 95.85, "elapsed_time": "1:01:42", "remaining_time": "0:02:40", "throughput": 2648.01, "total_tokens": 9804568}
7333
+ {"current_steps": 36570, "total_steps": 38150, "loss": 0.0681, "lr": 2.611210719773133e-07, "epoch": 9.585845347313237, "percentage": 95.86, "elapsed_time": "1:01:43", "remaining_time": "0:02:39", "throughput": 2647.97, "total_tokens": 9805528}
7334
+ {"current_steps": 36575, "total_steps": 38150, "loss": 0.0709, "lr": 2.5947492242896776e-07, "epoch": 9.587155963302752, "percentage": 95.87, "elapsed_time": "1:01:43", "remaining_time": "0:02:39", "throughput": 2647.96, "total_tokens": 9806616}
7335
+ {"current_steps": 36580, "total_steps": 38150, "loss": 0.178, "lr": 2.5783395102977057e-07, "epoch": 9.588466579292268, "percentage": 95.88, "elapsed_time": "1:01:43", "remaining_time": "0:02:38", "throughput": 2648.01, "total_tokens": 9807944}
7336
+ {"current_steps": 36585, "total_steps": 38150, "loss": 0.1877, "lr": 2.5619815812318037e-07, "epoch": 9.589777195281782, "percentage": 95.9, "elapsed_time": "1:01:44", "remaining_time": "0:02:38", "throughput": 2648.05, "total_tokens": 9809272}
7337
+ {"current_steps": 36590, "total_steps": 38150, "loss": 0.1827, "lr": 2.5456754405155934e-07, "epoch": 9.591087811271297, "percentage": 95.91, "elapsed_time": "1:01:44", "remaining_time": "0:02:37", "throughput": 2648.15, "total_tokens": 9811080}
7338
+ {"current_steps": 36595, "total_steps": 38150, "loss": 0.1156, "lr": 2.5294210915619564e-07, "epoch": 9.592398427260813, "percentage": 95.92, "elapsed_time": "1:01:45", "remaining_time": "0:02:37", "throughput": 2648.09, "total_tokens": 9811960}
7339
+ {"current_steps": 36600, "total_steps": 38150, "loss": 0.0805, "lr": 2.5132185377728656e-07, "epoch": 9.593709043250328, "percentage": 95.94, "elapsed_time": "1:01:45", "remaining_time": "0:02:36", "throughput": 2648.08, "total_tokens": 9813048}
7340
+ {"current_steps": 36605, "total_steps": 38150, "loss": 0.0964, "lr": 2.497067782539553e-07, "epoch": 9.595019659239842, "percentage": 95.95, "elapsed_time": "1:01:46", "remaining_time": "0:02:36", "throughput": 2648.08, "total_tokens": 9814168}
7341
+ {"current_steps": 36610, "total_steps": 38150, "loss": 0.0759, "lr": 2.480968829242286e-07, "epoch": 9.596330275229358, "percentage": 95.96, "elapsed_time": "1:01:46", "remaining_time": "0:02:35", "throughput": 2648.09, "total_tokens": 9815416}
7342
+ {"current_steps": 36615, "total_steps": 38150, "loss": 0.129, "lr": 2.4649216812505373e-07, "epoch": 9.597640891218873, "percentage": 95.98, "elapsed_time": "1:01:47", "remaining_time": "0:02:35", "throughput": 2648.11, "total_tokens": 9816632}
7343
+ {"current_steps": 36620, "total_steps": 38150, "loss": 0.0692, "lr": 2.448926341923008e-07, "epoch": 9.598951507208389, "percentage": 95.99, "elapsed_time": "1:01:47", "remaining_time": "0:02:34", "throughput": 2648.1, "total_tokens": 9817736}
7344
+ {"current_steps": 36625, "total_steps": 38150, "loss": 0.1438, "lr": 2.4329828146074095e-07, "epoch": 9.600262123197902, "percentage": 96.0, "elapsed_time": "1:01:47", "remaining_time": "0:02:34", "throughput": 2648.19, "total_tokens": 9819432}
7345
+ {"current_steps": 36630, "total_steps": 38150, "loss": 0.1348, "lr": 2.417091102640795e-07, "epoch": 9.601572739187418, "percentage": 96.02, "elapsed_time": "1:01:48", "remaining_time": "0:02:33", "throughput": 2648.29, "total_tokens": 9821096}
7346
+ {"current_steps": 36635, "total_steps": 38150, "loss": 0.2069, "lr": 2.401251209349198e-07, "epoch": 9.602883355176933, "percentage": 96.03, "elapsed_time": "1:01:48", "remaining_time": "0:02:33", "throughput": 2648.35, "total_tokens": 9822584}
7347
+ {"current_steps": 36640, "total_steps": 38150, "loss": 0.1228, "lr": 2.385463138047911e-07, "epoch": 9.604193971166449, "percentage": 96.04, "elapsed_time": "1:01:49", "remaining_time": "0:02:32", "throughput": 2648.36, "total_tokens": 9823720}
7348
+ {"current_steps": 36645, "total_steps": 38150, "loss": 0.1411, "lr": 2.3697268920413462e-07, "epoch": 9.605504587155963, "percentage": 96.06, "elapsed_time": "1:01:49", "remaining_time": "0:02:32", "throughput": 2648.4, "total_tokens": 9824984}
7349
+ {"current_steps": 36650, "total_steps": 38150, "loss": 0.3382, "lr": 2.3540424746230916e-07, "epoch": 9.606815203145478, "percentage": 96.07, "elapsed_time": "1:01:50", "remaining_time": "0:02:31", "throughput": 2648.4, "total_tokens": 9826152}
7350
+ {"current_steps": 36655, "total_steps": 38150, "loss": 0.0945, "lr": 2.3384098890758267e-07, "epoch": 9.608125819134994, "percentage": 96.08, "elapsed_time": "1:01:50", "remaining_time": "0:02:31", "throughput": 2648.39, "total_tokens": 9827224}
7351
+ {"current_steps": 36660, "total_steps": 38150, "loss": 0.0874, "lr": 2.322829138671434e-07, "epoch": 9.609436435124508, "percentage": 96.09, "elapsed_time": "1:01:51", "remaining_time": "0:02:30", "throughput": 2648.42, "total_tokens": 9828472}
7352
+ {"current_steps": 36665, "total_steps": 38150, "loss": 0.1032, "lr": 2.3073002266709443e-07, "epoch": 9.610747051114023, "percentage": 96.11, "elapsed_time": "1:01:51", "remaining_time": "0:02:30", "throughput": 2648.43, "total_tokens": 9829704}
7353
+ {"current_steps": 36670, "total_steps": 38150, "loss": 0.0851, "lr": 2.2918231563245629e-07, "epoch": 9.612057667103539, "percentage": 96.12, "elapsed_time": "1:01:52", "remaining_time": "0:02:29", "throughput": 2648.49, "total_tokens": 9831224}
7354
+ {"current_steps": 36675, "total_steps": 38150, "loss": 0.1299, "lr": 2.2763979308715877e-07, "epoch": 9.613368283093054, "percentage": 96.13, "elapsed_time": "1:01:52", "remaining_time": "0:02:29", "throughput": 2648.56, "total_tokens": 9832744}
7355
+ {"current_steps": 36680, "total_steps": 38150, "loss": 0.1377, "lr": 2.2610245535404638e-07, "epoch": 9.614678899082568, "percentage": 96.15, "elapsed_time": "1:01:52", "remaining_time": "0:02:28", "throughput": 2648.56, "total_tokens": 9833960}
7356
+ {"current_steps": 36685, "total_steps": 38150, "loss": 0.156, "lr": 2.2457030275488676e-07, "epoch": 9.615989515072084, "percentage": 96.16, "elapsed_time": "1:01:53", "remaining_time": "0:02:28", "throughput": 2648.69, "total_tokens": 9835960}
7357
+ {"current_steps": 36690, "total_steps": 38150, "loss": 0.1339, "lr": 2.2304333561035396e-07, "epoch": 9.617300131061599, "percentage": 96.17, "elapsed_time": "1:01:53", "remaining_time": "0:02:27", "throughput": 2648.66, "total_tokens": 9836936}
7358
+ {"current_steps": 36695, "total_steps": 38150, "loss": 0.1013, "lr": 2.2152155424003952e-07, "epoch": 9.618610747051115, "percentage": 96.19, "elapsed_time": "1:01:54", "remaining_time": "0:02:27", "throughput": 2648.72, "total_tokens": 9838296}
7359
+ {"current_steps": 36700, "total_steps": 38150, "loss": 0.3366, "lr": 2.2000495896244976e-07, "epoch": 9.619921363040628, "percentage": 96.2, "elapsed_time": "1:01:54", "remaining_time": "0:02:26", "throughput": 2648.81, "total_tokens": 9839912}
7360
+ {"current_steps": 36705, "total_steps": 38150, "loss": 0.1301, "lr": 2.1849355009500582e-07, "epoch": 9.621231979030144, "percentage": 96.21, "elapsed_time": "1:01:55", "remaining_time": "0:02:26", "throughput": 2648.82, "total_tokens": 9841096}
7361
+ {"current_steps": 36710, "total_steps": 38150, "loss": 0.0969, "lr": 2.1698732795404074e-07, "epoch": 9.62254259501966, "percentage": 96.23, "elapsed_time": "1:01:55", "remaining_time": "0:02:25", "throughput": 2648.88, "total_tokens": 9842600}
7362
+ {"current_steps": 36715, "total_steps": 38150, "loss": 0.0826, "lr": 2.1548629285480792e-07, "epoch": 9.623853211009175, "percentage": 96.24, "elapsed_time": "1:01:56", "remaining_time": "0:02:25", "throughput": 2649.0, "total_tokens": 9844456}
7363
+ {"current_steps": 36720, "total_steps": 38150, "loss": 0.0845, "lr": 2.1399044511146993e-07, "epoch": 9.625163826998689, "percentage": 96.25, "elapsed_time": "1:01:56", "remaining_time": "0:02:24", "throughput": 2649.06, "total_tokens": 9845944}
7364
+ {"current_steps": 36725, "total_steps": 38150, "loss": 0.1206, "lr": 2.124997850371041e-07, "epoch": 9.626474442988204, "percentage": 96.26, "elapsed_time": "1:01:57", "remaining_time": "0:02:24", "throughput": 2649.13, "total_tokens": 9847400}
7365
+ {"current_steps": 36730, "total_steps": 38150, "loss": 0.1642, "lr": 2.1101431294370533e-07, "epoch": 9.62778505897772, "percentage": 96.28, "elapsed_time": "1:01:57", "remaining_time": "0:02:23", "throughput": 2649.11, "total_tokens": 9848440}
7366
+ {"current_steps": 36735, "total_steps": 38150, "loss": 0.1629, "lr": 2.0953402914217768e-07, "epoch": 9.629095674967235, "percentage": 96.29, "elapsed_time": "1:01:58", "remaining_time": "0:02:23", "throughput": 2649.15, "total_tokens": 9849816}
7367
+ {"current_steps": 36740, "total_steps": 38150, "loss": 0.1517, "lr": 2.0805893394234e-07, "epoch": 9.63040629095675, "percentage": 96.3, "elapsed_time": "1:01:58", "remaining_time": "0:02:22", "throughput": 2649.22, "total_tokens": 9851304}
7368
+ {"current_steps": 36745, "total_steps": 38150, "loss": 0.1043, "lr": 2.065890276529342e-07, "epoch": 9.631716906946265, "percentage": 96.32, "elapsed_time": "1:01:58", "remaining_time": "0:02:22", "throughput": 2649.26, "total_tokens": 9852568}
7369
+ {"current_steps": 36750, "total_steps": 38150, "loss": 0.1554, "lr": 2.051243105816031e-07, "epoch": 9.63302752293578, "percentage": 96.33, "elapsed_time": "1:01:59", "remaining_time": "0:02:21", "throughput": 2649.33, "total_tokens": 9854056}
7370
+ {"current_steps": 36755, "total_steps": 38150, "loss": 0.082, "lr": 2.0366478303491255e-07, "epoch": 9.634338138925294, "percentage": 96.34, "elapsed_time": "1:01:59", "remaining_time": "0:02:21", "throughput": 2649.34, "total_tokens": 9855240}
7371
+ {"current_steps": 36760, "total_steps": 38150, "loss": 0.1635, "lr": 2.0221044531834043e-07, "epoch": 9.63564875491481, "percentage": 96.36, "elapsed_time": "1:02:00", "remaining_time": "0:02:20", "throughput": 2649.4, "total_tokens": 9856760}
7372
+ {"current_steps": 36765, "total_steps": 38150, "loss": 0.1282, "lr": 2.0076129773627105e-07, "epoch": 9.636959370904325, "percentage": 96.37, "elapsed_time": "1:02:00", "remaining_time": "0:02:20", "throughput": 2649.47, "total_tokens": 9858360}
7373
+ {"current_steps": 36770, "total_steps": 38150, "loss": 0.1528, "lr": 1.9931734059201457e-07, "epoch": 9.63826998689384, "percentage": 96.38, "elapsed_time": "1:02:01", "remaining_time": "0:02:19", "throughput": 2649.51, "total_tokens": 9859624}
7374
+ {"current_steps": 36775, "total_steps": 38150, "loss": 0.0667, "lr": 1.9787857418778478e-07, "epoch": 9.639580602883354, "percentage": 96.4, "elapsed_time": "1:02:01", "remaining_time": "0:02:19", "throughput": 2649.53, "total_tokens": 9860856}
7375
+ {"current_steps": 36780, "total_steps": 38150, "loss": 0.1241, "lr": 1.9644499882471578e-07, "epoch": 9.64089121887287, "percentage": 96.41, "elapsed_time": "1:02:02", "remaining_time": "0:02:18", "throughput": 2649.59, "total_tokens": 9862296}
7376
+ {"current_steps": 36785, "total_steps": 38150, "loss": 0.1271, "lr": 1.9501661480285095e-07, "epoch": 9.642201834862385, "percentage": 96.42, "elapsed_time": "1:02:02", "remaining_time": "0:02:18", "throughput": 2649.67, "total_tokens": 9863784}
7377
+ {"current_steps": 36790, "total_steps": 38150, "loss": 0.1759, "lr": 1.935934224211483e-07, "epoch": 9.643512450851901, "percentage": 96.44, "elapsed_time": "1:02:03", "remaining_time": "0:02:17", "throughput": 2649.74, "total_tokens": 9865288}
7378
+ {"current_steps": 36795, "total_steps": 38150, "loss": 0.0839, "lr": 1.9217542197748074e-07, "epoch": 9.644823066841415, "percentage": 96.45, "elapsed_time": "1:02:03", "remaining_time": "0:02:17", "throughput": 2649.75, "total_tokens": 9866504}
7379
+ {"current_steps": 36800, "total_steps": 38150, "loss": 0.1614, "lr": 1.9076261376863303e-07, "epoch": 9.64613368283093, "percentage": 96.46, "elapsed_time": "1:02:03", "remaining_time": "0:02:16", "throughput": 2649.72, "total_tokens": 9867480}
7380
+ {"current_steps": 36805, "total_steps": 38150, "loss": 0.1588, "lr": 1.8935499809030476e-07, "epoch": 9.647444298820446, "percentage": 96.47, "elapsed_time": "1:02:04", "remaining_time": "0:02:16", "throughput": 2649.84, "total_tokens": 9869256}
7381
+ {"current_steps": 36810, "total_steps": 38150, "loss": 0.1095, "lr": 1.8795257523710197e-07, "epoch": 9.648754914809961, "percentage": 96.49, "elapsed_time": "1:02:05", "remaining_time": "0:02:15", "throughput": 2649.96, "total_tokens": 9871160}
7382
+ {"current_steps": 36815, "total_steps": 38150, "loss": 0.1181, "lr": 1.8655534550255648e-07, "epoch": 9.650065530799475, "percentage": 96.5, "elapsed_time": "1:02:05", "remaining_time": "0:02:15", "throughput": 2650.01, "total_tokens": 9872504}
7383
+ {"current_steps": 36820, "total_steps": 38150, "loss": 0.0975, "lr": 1.8516330917910108e-07, "epoch": 9.65137614678899, "percentage": 96.51, "elapsed_time": "1:02:05", "remaining_time": "0:02:14", "throughput": 2650.07, "total_tokens": 9873928}
7384
+ {"current_steps": 36825, "total_steps": 38150, "loss": 0.1904, "lr": 1.8377646655808877e-07, "epoch": 9.652686762778506, "percentage": 96.53, "elapsed_time": "1:02:06", "remaining_time": "0:02:14", "throughput": 2650.13, "total_tokens": 9875368}
7385
+ {"current_steps": 36830, "total_steps": 38150, "loss": 0.1047, "lr": 1.8239481792978464e-07, "epoch": 9.653997378768022, "percentage": 96.54, "elapsed_time": "1:02:06", "remaining_time": "0:02:13", "throughput": 2650.15, "total_tokens": 9876584}
7386
+ {"current_steps": 36835, "total_steps": 38150, "loss": 0.1226, "lr": 1.8101836358336287e-07, "epoch": 9.655307994757536, "percentage": 96.55, "elapsed_time": "1:02:07", "remaining_time": "0:02:13", "throughput": 2650.3, "total_tokens": 9878584}
7387
+ {"current_steps": 36840, "total_steps": 38150, "loss": 0.1224, "lr": 1.7964710380691251e-07, "epoch": 9.656618610747051, "percentage": 96.57, "elapsed_time": "1:02:07", "remaining_time": "0:02:12", "throughput": 2650.24, "total_tokens": 9879432}
7388
+ {"current_steps": 36845, "total_steps": 38150, "loss": 0.1134, "lr": 1.782810388874373e-07, "epoch": 9.657929226736567, "percentage": 96.58, "elapsed_time": "1:02:08", "remaining_time": "0:02:12", "throughput": 2650.27, "total_tokens": 9880744}
7389
+ {"current_steps": 36850, "total_steps": 38150, "loss": 0.1709, "lr": 1.7692016911085295e-07, "epoch": 9.65923984272608, "percentage": 96.59, "elapsed_time": "1:02:08", "remaining_time": "0:02:11", "throughput": 2650.33, "total_tokens": 9882280}
7390
+ {"current_steps": 36855, "total_steps": 38150, "loss": 0.2123, "lr": 1.7556449476198445e-07, "epoch": 9.660550458715596, "percentage": 96.61, "elapsed_time": "1:02:09", "remaining_time": "0:02:11", "throughput": 2650.41, "total_tokens": 9883816}
7391
+ {"current_steps": 36860, "total_steps": 38150, "loss": 0.1901, "lr": 1.7421401612457423e-07, "epoch": 9.661861074705111, "percentage": 96.62, "elapsed_time": "1:02:09", "remaining_time": "0:02:10", "throughput": 2650.51, "total_tokens": 9885512}
7392
+ {"current_steps": 36865, "total_steps": 38150, "loss": 0.085, "lr": 1.7286873348127676e-07, "epoch": 9.663171690694627, "percentage": 96.63, "elapsed_time": "1:02:10", "remaining_time": "0:02:10", "throughput": 2650.51, "total_tokens": 9886600}
7393
+ {"current_steps": 36870, "total_steps": 38150, "loss": 0.1106, "lr": 1.715286471136529e-07, "epoch": 9.66448230668414, "percentage": 96.64, "elapsed_time": "1:02:10", "remaining_time": "0:02:09", "throughput": 2650.48, "total_tokens": 9887592}
7394
+ {"current_steps": 36875, "total_steps": 38150, "loss": 0.1021, "lr": 1.7019375730218667e-07, "epoch": 9.665792922673656, "percentage": 96.66, "elapsed_time": "1:02:10", "remaining_time": "0:02:09", "throughput": 2650.43, "total_tokens": 9888504}
7395
+ {"current_steps": 36880, "total_steps": 38150, "loss": 0.164, "lr": 1.688640643262629e-07, "epoch": 9.667103538663172, "percentage": 96.67, "elapsed_time": "1:02:11", "remaining_time": "0:02:08", "throughput": 2650.45, "total_tokens": 9889688}
7396
+ {"current_steps": 36885, "total_steps": 38150, "loss": 0.2311, "lr": 1.6753956846418395e-07, "epoch": 9.668414154652687, "percentage": 96.68, "elapsed_time": "1:02:11", "remaining_time": "0:02:07", "throughput": 2650.53, "total_tokens": 9891304}
7397
+ {"current_steps": 36890, "total_steps": 38150, "loss": 0.104, "lr": 1.6622026999316697e-07, "epoch": 9.669724770642201, "percentage": 96.7, "elapsed_time": "1:02:12", "remaining_time": "0:02:07", "throughput": 2650.65, "total_tokens": 9893064}
7398
+ {"current_steps": 36895, "total_steps": 38150, "loss": 0.1408, "lr": 1.6490616918933554e-07, "epoch": 9.671035386631717, "percentage": 96.71, "elapsed_time": "1:02:12", "remaining_time": "0:02:06", "throughput": 2650.74, "total_tokens": 9894680}
7399
+ {"current_steps": 36900, "total_steps": 38150, "loss": 0.1382, "lr": 1.6359726632773353e-07, "epoch": 9.672346002621232, "percentage": 96.72, "elapsed_time": "1:02:13", "remaining_time": "0:02:06", "throughput": 2650.77, "total_tokens": 9896056}
7400
+ {"current_steps": 36905, "total_steps": 38150, "loss": 0.1903, "lr": 1.6229356168231125e-07, "epoch": 9.673656618610748, "percentage": 96.74, "elapsed_time": "1:02:13", "remaining_time": "0:02:05", "throughput": 2650.82, "total_tokens": 9897448}
7401
+ {"current_steps": 36910, "total_steps": 38150, "loss": 0.1601, "lr": 1.609950555259282e-07, "epoch": 9.674967234600262, "percentage": 96.75, "elapsed_time": "1:02:14", "remaining_time": "0:02:05", "throughput": 2650.92, "total_tokens": 9899128}
7402
+ {"current_steps": 36915, "total_steps": 38150, "loss": 0.1448, "lr": 1.5970174813036144e-07, "epoch": 9.676277850589777, "percentage": 96.76, "elapsed_time": "1:02:14", "remaining_time": "0:02:04", "throughput": 2650.98, "total_tokens": 9900552}
7403
+ {"current_steps": 36920, "total_steps": 38150, "loss": 0.1916, "lr": 1.5841363976629998e-07, "epoch": 9.677588466579293, "percentage": 96.78, "elapsed_time": "1:02:15", "remaining_time": "0:02:04", "throughput": 2651.0, "total_tokens": 9901784}
7404
+ {"current_steps": 36925, "total_steps": 38150, "loss": 0.1158, "lr": 1.5713073070333927e-07, "epoch": 9.678899082568808, "percentage": 96.79, "elapsed_time": "1:02:15", "remaining_time": "0:02:03", "throughput": 2650.99, "total_tokens": 9902968}
7405
+ {"current_steps": 36930, "total_steps": 38150, "loss": 0.1522, "lr": 1.5585302120998956e-07, "epoch": 9.680209698558322, "percentage": 96.8, "elapsed_time": "1:02:16", "remaining_time": "0:02:03", "throughput": 2651.06, "total_tokens": 9904472}
7406
+ {"current_steps": 36935, "total_steps": 38150, "loss": 0.1691, "lr": 1.545805115536786e-07, "epoch": 9.681520314547837, "percentage": 96.82, "elapsed_time": "1:02:16", "remaining_time": "0:02:02", "throughput": 2651.09, "total_tokens": 9905768}
7407
+ {"current_steps": 36940, "total_steps": 38150, "loss": 0.1111, "lr": 1.5331320200073497e-07, "epoch": 9.682830930537353, "percentage": 96.83, "elapsed_time": "1:02:17", "remaining_time": "0:02:02", "throughput": 2651.23, "total_tokens": 9907912}
7408
+ {"current_steps": 36945, "total_steps": 38150, "loss": 0.0536, "lr": 1.520510928164076e-07, "epoch": 9.684141546526867, "percentage": 96.84, "elapsed_time": "1:02:17", "remaining_time": "0:02:01", "throughput": 2651.22, "total_tokens": 9908984}
7409
+ {"current_steps": 36950, "total_steps": 38150, "loss": 0.2044, "lr": 1.5079418426485193e-07, "epoch": 9.685452162516382, "percentage": 96.85, "elapsed_time": "1:02:17", "remaining_time": "0:02:01", "throughput": 2651.24, "total_tokens": 9910216}
7410
+ {"current_steps": 36955, "total_steps": 38150, "loss": 0.1604, "lr": 1.49542476609138e-07, "epoch": 9.686762778505898, "percentage": 96.87, "elapsed_time": "1:02:18", "remaining_time": "0:02:00", "throughput": 2651.27, "total_tokens": 9911544}
7411
+ {"current_steps": 36960, "total_steps": 38150, "loss": 0.1311, "lr": 1.482959701112452e-07, "epoch": 9.688073394495413, "percentage": 96.88, "elapsed_time": "1:02:18", "remaining_time": "0:02:00", "throughput": 2651.35, "total_tokens": 9913352}
7412
+ {"current_steps": 36965, "total_steps": 38150, "loss": 0.1137, "lr": 1.47054665032062e-07, "epoch": 9.689384010484927, "percentage": 96.89, "elapsed_time": "1:02:19", "remaining_time": "0:01:59", "throughput": 2651.33, "total_tokens": 9914376}
7413
+ {"current_steps": 36970, "total_steps": 38150, "loss": 0.1273, "lr": 1.4581856163140008e-07, "epoch": 9.690694626474443, "percentage": 96.91, "elapsed_time": "1:02:19", "remaining_time": "0:01:59", "throughput": 2651.31, "total_tokens": 9915448}
7414
+ {"current_steps": 36975, "total_steps": 38150, "loss": 0.1952, "lr": 1.4458766016796632e-07, "epoch": 9.692005242463958, "percentage": 96.92, "elapsed_time": "1:02:20", "remaining_time": "0:01:58", "throughput": 2651.37, "total_tokens": 9916936}
7415
+ {"current_steps": 36980, "total_steps": 38150, "loss": 0.1393, "lr": 1.4336196089938802e-07, "epoch": 9.693315858453474, "percentage": 96.93, "elapsed_time": "1:02:20", "remaining_time": "0:01:58", "throughput": 2651.4, "total_tokens": 9918200}
7416
+ {"current_steps": 36985, "total_steps": 38150, "loss": 0.1943, "lr": 1.4214146408220163e-07, "epoch": 9.694626474442988, "percentage": 96.95, "elapsed_time": "1:02:21", "remaining_time": "0:01:57", "throughput": 2651.55, "total_tokens": 9920120}
7417
+ {"current_steps": 36990, "total_steps": 38150, "loss": 0.1336, "lr": 1.4092616997185837e-07, "epoch": 9.695937090432503, "percentage": 96.96, "elapsed_time": "1:02:21", "remaining_time": "0:01:57", "throughput": 2651.59, "total_tokens": 9921464}
7418
+ {"current_steps": 36995, "total_steps": 38150, "loss": 0.2762, "lr": 1.397160788227131e-07, "epoch": 9.697247706422019, "percentage": 96.97, "elapsed_time": "1:02:22", "remaining_time": "0:01:56", "throughput": 2651.67, "total_tokens": 9923128}
7419
+ {"current_steps": 37000, "total_steps": 38150, "loss": 0.1702, "lr": 1.385111908880382e-07, "epoch": 9.698558322411534, "percentage": 96.99, "elapsed_time": "1:02:22", "remaining_time": "0:01:56", "throughput": 2651.71, "total_tokens": 9924408}
7420
+ {"current_steps": 37005, "total_steps": 38150, "loss": 0.2028, "lr": 1.3731150642001255e-07, "epoch": 9.699868938401048, "percentage": 97.0, "elapsed_time": "1:02:23", "remaining_time": "0:01:55", "throughput": 2651.77, "total_tokens": 9925912}
7421
+ {"current_steps": 37010, "total_steps": 38150, "loss": 0.1353, "lr": 1.3611702566972694e-07, "epoch": 9.701179554390563, "percentage": 97.01, "elapsed_time": "1:02:23", "remaining_time": "0:01:55", "throughput": 2651.76, "total_tokens": 9927000}
7422
+ {"current_steps": 37015, "total_steps": 38150, "loss": 0.1407, "lr": 1.3492774888718974e-07, "epoch": 9.702490170380079, "percentage": 97.02, "elapsed_time": "1:02:23", "remaining_time": "0:01:54", "throughput": 2651.73, "total_tokens": 9927992}
7423
+ {"current_steps": 37020, "total_steps": 38150, "loss": 0.2649, "lr": 1.3374367632131014e-07, "epoch": 9.703800786369595, "percentage": 97.04, "elapsed_time": "1:02:24", "remaining_time": "0:01:54", "throughput": 2651.8, "total_tokens": 9929448}
7424
+ {"current_steps": 37025, "total_steps": 38150, "loss": 0.1376, "lr": 1.3256480821991213e-07, "epoch": 9.705111402359108, "percentage": 97.05, "elapsed_time": "1:02:24", "remaining_time": "0:01:53", "throughput": 2651.84, "total_tokens": 9930808}
7425
+ {"current_steps": 37030, "total_steps": 38150, "loss": 0.1696, "lr": 1.3139114482973448e-07, "epoch": 9.706422018348624, "percentage": 97.06, "elapsed_time": "1:02:25", "remaining_time": "0:01:53", "throughput": 2651.9, "total_tokens": 9932232}
7426
+ {"current_steps": 37035, "total_steps": 38150, "loss": 0.2084, "lr": 1.3022268639641956e-07, "epoch": 9.70773263433814, "percentage": 97.08, "elapsed_time": "1:02:25", "remaining_time": "0:01:52", "throughput": 2651.94, "total_tokens": 9933656}
7427
+ {"current_steps": 37040, "total_steps": 38150, "loss": 0.1302, "lr": 1.290594331645245e-07, "epoch": 9.709043250327653, "percentage": 97.09, "elapsed_time": "1:02:26", "remaining_time": "0:01:52", "throughput": 2652.02, "total_tokens": 9935368}
7428
+ {"current_steps": 37045, "total_steps": 38150, "loss": 0.0587, "lr": 1.279013853775185e-07, "epoch": 9.710353866317169, "percentage": 97.1, "elapsed_time": "1:02:26", "remaining_time": "0:01:51", "throughput": 2651.99, "total_tokens": 9936376}
7429
+ {"current_steps": 37050, "total_steps": 38150, "loss": 0.1608, "lr": 1.267485432777743e-07, "epoch": 9.711664482306684, "percentage": 97.12, "elapsed_time": "1:02:27", "remaining_time": "0:01:51", "throughput": 2651.98, "total_tokens": 9937464}
7430
+ {"current_steps": 37055, "total_steps": 38150, "loss": 0.1002, "lr": 1.2560090710658223e-07, "epoch": 9.7129750982962, "percentage": 97.13, "elapsed_time": "1:02:27", "remaining_time": "0:01:50", "throughput": 2652.01, "total_tokens": 9938696}
7431
+ {"current_steps": 37060, "total_steps": 38150, "loss": 0.0309, "lr": 1.2445847710414183e-07, "epoch": 9.714285714285714, "percentage": 97.14, "elapsed_time": "1:02:28", "remaining_time": "0:01:50", "throughput": 2651.97, "total_tokens": 9939656}
7432
+ {"current_steps": 37065, "total_steps": 38150, "loss": 0.1133, "lr": 1.233212535095618e-07, "epoch": 9.715596330275229, "percentage": 97.16, "elapsed_time": "1:02:28", "remaining_time": "0:01:49", "throughput": 2651.99, "total_tokens": 9940888}
7433
+ {"current_steps": 37070, "total_steps": 38150, "loss": 0.0987, "lr": 1.2218923656085735e-07, "epoch": 9.716906946264745, "percentage": 97.17, "elapsed_time": "1:02:28", "remaining_time": "0:01:49", "throughput": 2651.99, "total_tokens": 9942024}
7434
+ {"current_steps": 37075, "total_steps": 38150, "loss": 0.1676, "lr": 1.2106242649496112e-07, "epoch": 9.71821756225426, "percentage": 97.18, "elapsed_time": "1:02:29", "remaining_time": "0:01:48", "throughput": 2652.02, "total_tokens": 9943336}
7435
+ {"current_steps": 37080, "total_steps": 38150, "loss": 0.3596, "lr": 1.199408235477123e-07, "epoch": 9.719528178243774, "percentage": 97.2, "elapsed_time": "1:02:29", "remaining_time": "0:01:48", "throughput": 2652.13, "total_tokens": 9945096}
7436
+ {"current_steps": 37085, "total_steps": 38150, "loss": 0.1564, "lr": 1.188244279538564e-07, "epoch": 9.72083879423329, "percentage": 97.21, "elapsed_time": "1:02:30", "remaining_time": "0:01:47", "throughput": 2652.16, "total_tokens": 9946344}
7437
+ {"current_steps": 37090, "total_steps": 38150, "loss": 0.0906, "lr": 1.1771323994705929e-07, "epoch": 9.722149410222805, "percentage": 97.22, "elapsed_time": "1:02:30", "remaining_time": "0:01:47", "throughput": 2652.18, "total_tokens": 9947688}
7438
+ {"current_steps": 37095, "total_steps": 38150, "loss": 0.1409, "lr": 1.1660725975988773e-07, "epoch": 9.72346002621232, "percentage": 97.23, "elapsed_time": "1:02:31", "remaining_time": "0:01:46", "throughput": 2652.34, "total_tokens": 9949816}
7439
+ {"current_steps": 37100, "total_steps": 38150, "loss": 0.2164, "lr": 1.1550648762382044e-07, "epoch": 9.724770642201834, "percentage": 97.25, "elapsed_time": "1:02:31", "remaining_time": "0:01:46", "throughput": 2652.37, "total_tokens": 9951112}
7440
+ {"current_steps": 37105, "total_steps": 38150, "loss": 0.1131, "lr": 1.144109237692509e-07, "epoch": 9.72608125819135, "percentage": 97.26, "elapsed_time": "1:02:32", "remaining_time": "0:01:45", "throughput": 2652.39, "total_tokens": 9952312}
7441
+ {"current_steps": 37110, "total_steps": 38150, "loss": 0.1201, "lr": 1.1332056842547344e-07, "epoch": 9.727391874180865, "percentage": 97.27, "elapsed_time": "1:02:32", "remaining_time": "0:01:45", "throughput": 2652.39, "total_tokens": 9953448}
7442
+ {"current_steps": 37115, "total_steps": 38150, "loss": 0.1869, "lr": 1.1223542182070002e-07, "epoch": 9.728702490170381, "percentage": 97.29, "elapsed_time": "1:02:33", "remaining_time": "0:01:44", "throughput": 2652.46, "total_tokens": 9954936}
7443
+ {"current_steps": 37120, "total_steps": 38150, "loss": 0.2003, "lr": 1.1115548418205168e-07, "epoch": 9.730013106159895, "percentage": 97.3, "elapsed_time": "1:02:33", "remaining_time": "0:01:44", "throughput": 2652.5, "total_tokens": 9956216}
7444
+ {"current_steps": 37125, "total_steps": 38150, "loss": 0.1544, "lr": 1.1008075573555599e-07, "epoch": 9.73132372214941, "percentage": 97.31, "elapsed_time": "1:02:33", "remaining_time": "0:01:43", "throughput": 2652.51, "total_tokens": 9957416}
7445
+ {"current_steps": 37130, "total_steps": 38150, "loss": 0.0887, "lr": 1.0901123670614965e-07, "epoch": 9.732634338138926, "percentage": 97.33, "elapsed_time": "1:02:34", "remaining_time": "0:01:43", "throughput": 2652.5, "total_tokens": 9958504}
7446
+ {"current_steps": 37135, "total_steps": 38150, "loss": 0.1886, "lr": 1.0794692731768419e-07, "epoch": 9.73394495412844, "percentage": 97.34, "elapsed_time": "1:02:34", "remaining_time": "0:01:42", "throughput": 2652.53, "total_tokens": 9959784}
7447
+ {"current_steps": 37140, "total_steps": 38150, "loss": 0.2485, "lr": 1.0688782779291473e-07, "epoch": 9.735255570117955, "percentage": 97.35, "elapsed_time": "1:02:35", "remaining_time": "0:01:42", "throughput": 2652.64, "total_tokens": 9961576}
7448
+ {"current_steps": 37145, "total_steps": 38150, "loss": 0.1229, "lr": 1.0583393835351396e-07, "epoch": 9.73656618610747, "percentage": 97.37, "elapsed_time": "1:02:35", "remaining_time": "0:01:41", "throughput": 2652.69, "total_tokens": 9963032}
7449
+ {"current_steps": 37150, "total_steps": 38150, "loss": 0.0875, "lr": 1.0478525922005545e-07, "epoch": 9.737876802096986, "percentage": 97.38, "elapsed_time": "1:02:36", "remaining_time": "0:01:41", "throughput": 2652.68, "total_tokens": 9964056}
7450
+ {"current_steps": 37155, "total_steps": 38150, "loss": 0.065, "lr": 1.0374179061202749e-07, "epoch": 9.7391874180865, "percentage": 97.39, "elapsed_time": "1:02:36", "remaining_time": "0:01:40", "throughput": 2652.71, "total_tokens": 9965368}
7451
+ {"current_steps": 37160, "total_steps": 38150, "loss": 0.1973, "lr": 1.0270353274782207e-07, "epoch": 9.740498034076015, "percentage": 97.4, "elapsed_time": "1:02:37", "remaining_time": "0:01:40", "throughput": 2652.73, "total_tokens": 9966680}
7452
+ {"current_steps": 37165, "total_steps": 38150, "loss": 0.1115, "lr": 1.0167048584475147e-07, "epoch": 9.741808650065531, "percentage": 97.42, "elapsed_time": "1:02:37", "remaining_time": "0:01:39", "throughput": 2652.73, "total_tokens": 9967864}
7453
+ {"current_steps": 37170, "total_steps": 38150, "loss": 0.1481, "lr": 1.006426501190233e-07, "epoch": 9.743119266055047, "percentage": 97.43, "elapsed_time": "1:02:38", "remaining_time": "0:01:39", "throughput": 2652.82, "total_tokens": 9969448}
7454
+ {"current_steps": 37175, "total_steps": 38150, "loss": 0.1174, "lr": 9.962002578576823e-08, "epoch": 9.74442988204456, "percentage": 97.44, "elapsed_time": "1:02:38", "remaining_time": "0:01:38", "throughput": 2652.84, "total_tokens": 9970712}
7455
+ {"current_steps": 37180, "total_steps": 38150, "loss": 0.1594, "lr": 9.860261305901785e-08, "epoch": 9.745740498034076, "percentage": 97.46, "elapsed_time": "1:02:38", "remaining_time": "0:01:38", "throughput": 2652.81, "total_tokens": 9971736}
7456
+ {"current_steps": 37185, "total_steps": 38150, "loss": 0.1186, "lr": 9.759041215171295e-08, "epoch": 9.747051114023591, "percentage": 97.47, "elapsed_time": "1:02:39", "remaining_time": "0:01:37", "throughput": 2652.81, "total_tokens": 9972872}
7457
+ {"current_steps": 37190, "total_steps": 38150, "loss": 0.2553, "lr": 9.658342327570902e-08, "epoch": 9.748361730013107, "percentage": 97.48, "elapsed_time": "1:02:39", "remaining_time": "0:01:37", "throughput": 2652.76, "total_tokens": 9973784}
7458
+ {"current_steps": 37195, "total_steps": 38150, "loss": 0.0937, "lr": 9.55816466417625e-08, "epoch": 9.74967234600262, "percentage": 97.5, "elapsed_time": "1:02:40", "remaining_time": "0:01:36", "throughput": 2652.74, "total_tokens": 9974840}
7459
+ {"current_steps": 37200, "total_steps": 38150, "loss": 0.1639, "lr": 9.458508245954456e-08, "epoch": 9.750982961992136, "percentage": 97.51, "elapsed_time": "1:02:40", "remaining_time": "0:01:36", "throughput": 2652.76, "total_tokens": 9976088}
7460
+ {"current_steps": 37205, "total_steps": 38150, "loss": 0.1298, "lr": 9.359373093763835e-08, "epoch": 9.752293577981652, "percentage": 97.52, "elapsed_time": "1:02:41", "remaining_time": "0:01:35", "throughput": 2652.81, "total_tokens": 9977432}
7461
+ {"current_steps": 37210, "total_steps": 38150, "loss": 0.1769, "lr": 9.260759228352789e-08, "epoch": 9.753604193971167, "percentage": 97.54, "elapsed_time": "1:02:41", "remaining_time": "0:01:35", "throughput": 2652.77, "total_tokens": 9978392}
7462
+ {"current_steps": 37215, "total_steps": 38150, "loss": 0.2187, "lr": 9.162666670361198e-08, "epoch": 9.754914809960681, "percentage": 97.55, "elapsed_time": "1:02:41", "remaining_time": "0:01:34", "throughput": 2652.87, "total_tokens": 9980024}
7463
+ {"current_steps": 37220, "total_steps": 38150, "loss": 0.1725, "lr": 9.065095440319582e-08, "epoch": 9.756225425950197, "percentage": 97.56, "elapsed_time": "1:02:42", "remaining_time": "0:01:34", "throughput": 2652.91, "total_tokens": 9981416}
7464
+ {"current_steps": 37225, "total_steps": 38150, "loss": 0.0916, "lr": 8.968045558649663e-08, "epoch": 9.757536041939712, "percentage": 97.58, "elapsed_time": "1:02:42", "remaining_time": "0:01:33", "throughput": 2652.94, "total_tokens": 9982664}
7465
+ {"current_steps": 37230, "total_steps": 38150, "loss": 0.096, "lr": 8.871517045663524e-08, "epoch": 9.758846657929226, "percentage": 97.59, "elapsed_time": "1:02:43", "remaining_time": "0:01:32", "throughput": 2652.99, "total_tokens": 9984040}
7466
+ {"current_steps": 37235, "total_steps": 38150, "loss": 0.1811, "lr": 8.775509921564728e-08, "epoch": 9.760157273918741, "percentage": 97.6, "elapsed_time": "1:02:43", "remaining_time": "0:01:32", "throughput": 2653.02, "total_tokens": 9985400}
7467
+ {"current_steps": 37240, "total_steps": 38150, "loss": 0.26, "lr": 8.680024206446924e-08, "epoch": 9.761467889908257, "percentage": 97.61, "elapsed_time": "1:02:44", "remaining_time": "0:01:31", "throughput": 2653.03, "total_tokens": 9986584}
7468
+ {"current_steps": 37245, "total_steps": 38150, "loss": 0.2173, "lr": 8.585059920295512e-08, "epoch": 9.762778505897773, "percentage": 97.63, "elapsed_time": "1:02:44", "remaining_time": "0:01:31", "throughput": 2653.1, "total_tokens": 9988088}
7469
+ {"current_steps": 37250, "total_steps": 38150, "loss": 0.2054, "lr": 8.490617082985986e-08, "epoch": 9.764089121887286, "percentage": 97.64, "elapsed_time": "1:02:45", "remaining_time": "0:01:30", "throughput": 2653.09, "total_tokens": 9989192}
7470
+ {"current_steps": 37255, "total_steps": 38150, "loss": 0.1346, "lr": 8.396695714285585e-08, "epoch": 9.765399737876802, "percentage": 97.65, "elapsed_time": "1:02:45", "remaining_time": "0:01:30", "throughput": 2653.11, "total_tokens": 9990392}
7471
+ {"current_steps": 37260, "total_steps": 38150, "loss": 0.1504, "lr": 8.303295833851365e-08, "epoch": 9.766710353866317, "percentage": 97.67, "elapsed_time": "1:02:45", "remaining_time": "0:01:29", "throughput": 2653.13, "total_tokens": 9991608}
7472
+ {"current_steps": 37265, "total_steps": 38150, "loss": 0.1396, "lr": 8.210417461232412e-08, "epoch": 9.768020969855833, "percentage": 97.68, "elapsed_time": "1:02:46", "remaining_time": "0:01:29", "throughput": 2653.16, "total_tokens": 9992968}
7473
+ {"current_steps": 37270, "total_steps": 38150, "loss": 0.1273, "lr": 8.11806061586734e-08, "epoch": 9.769331585845347, "percentage": 97.69, "elapsed_time": "1:02:46", "remaining_time": "0:01:28", "throughput": 2653.2, "total_tokens": 9994280}
7474
+ {"current_steps": 37275, "total_steps": 38150, "loss": 0.1413, "lr": 8.026225317086522e-08, "epoch": 9.770642201834862, "percentage": 97.71, "elapsed_time": "1:02:47", "remaining_time": "0:01:28", "throughput": 2653.25, "total_tokens": 9995736}
7475
+ {"current_steps": 37280, "total_steps": 38150, "loss": 0.1667, "lr": 7.934911584110971e-08, "epoch": 9.771952817824378, "percentage": 97.72, "elapsed_time": "1:02:47", "remaining_time": "0:01:27", "throughput": 2653.3, "total_tokens": 9997064}
7476
+ {"current_steps": 37285, "total_steps": 38150, "loss": 0.1005, "lr": 7.844119436052622e-08, "epoch": 9.773263433813893, "percentage": 97.73, "elapsed_time": "1:02:48", "remaining_time": "0:01:27", "throughput": 2653.27, "total_tokens": 9998072}
7477
+ {"current_steps": 37290, "total_steps": 38150, "loss": 0.1651, "lr": 7.753848891913772e-08, "epoch": 9.774574049803407, "percentage": 97.75, "elapsed_time": "1:02:48", "remaining_time": "0:01:26", "throughput": 2653.32, "total_tokens": 9999416}
7478
+ {"current_steps": 37295, "total_steps": 38150, "loss": 0.1225, "lr": 7.664099970588201e-08, "epoch": 9.775884665792923, "percentage": 97.76, "elapsed_time": "1:02:49", "remaining_time": "0:01:26", "throughput": 2653.33, "total_tokens": 10000648}
7479
+ {"current_steps": 37300, "total_steps": 38150, "loss": 0.0339, "lr": 7.57487269085977e-08, "epoch": 9.777195281782438, "percentage": 97.77, "elapsed_time": "1:02:49", "remaining_time": "0:01:25", "throughput": 2653.27, "total_tokens": 10001544}
7480
+ {"current_steps": 37305, "total_steps": 38150, "loss": 0.0857, "lr": 7.486167071404371e-08, "epoch": 9.778505897771954, "percentage": 97.79, "elapsed_time": "1:02:49", "remaining_time": "0:01:25", "throughput": 2653.27, "total_tokens": 10002648}
7481
+ {"current_steps": 37310, "total_steps": 38150, "loss": 0.1473, "lr": 7.397983130787156e-08, "epoch": 9.779816513761467, "percentage": 97.8, "elapsed_time": "1:02:50", "remaining_time": "0:01:24", "throughput": 2653.3, "total_tokens": 10004024}
7482
+ {"current_steps": 37315, "total_steps": 38150, "loss": 0.1754, "lr": 7.310320887465305e-08, "epoch": 9.781127129750983, "percentage": 97.81, "elapsed_time": "1:02:50", "remaining_time": "0:01:24", "throughput": 2653.32, "total_tokens": 10005288}
7483
+ {"current_steps": 37320, "total_steps": 38150, "loss": 0.1704, "lr": 7.223180359786086e-08, "epoch": 9.782437745740499, "percentage": 97.82, "elapsed_time": "1:02:51", "remaining_time": "0:01:23", "throughput": 2653.36, "total_tokens": 10006600}
7484
+ {"current_steps": 37325, "total_steps": 38150, "loss": 0.1791, "lr": 7.136561565988242e-08, "epoch": 9.783748361730012, "percentage": 97.84, "elapsed_time": "1:02:51", "remaining_time": "0:01:23", "throughput": 2653.47, "total_tokens": 10008328}
7485
+ {"current_steps": 37330, "total_steps": 38150, "loss": 0.1542, "lr": 7.050464524200607e-08, "epoch": 9.785058977719528, "percentage": 97.85, "elapsed_time": "1:02:52", "remaining_time": "0:01:22", "throughput": 2653.49, "total_tokens": 10009528}
7486
+ {"current_steps": 37335, "total_steps": 38150, "loss": 0.1194, "lr": 6.964889252443485e-08, "epoch": 9.786369593709043, "percentage": 97.86, "elapsed_time": "1:02:52", "remaining_time": "0:01:22", "throughput": 2653.53, "total_tokens": 10010920}
7487
+ {"current_steps": 37340, "total_steps": 38150, "loss": 0.2579, "lr": 6.879835768627274e-08, "epoch": 9.787680209698559, "percentage": 97.88, "elapsed_time": "1:02:53", "remaining_time": "0:01:21", "throughput": 2653.63, "total_tokens": 10012584}
7488
+ {"current_steps": 37345, "total_steps": 38150, "loss": 0.1656, "lr": 6.795304090553567e-08, "epoch": 9.788990825688073, "percentage": 97.89, "elapsed_time": "1:02:53", "remaining_time": "0:01:21", "throughput": 2653.67, "total_tokens": 10013864}
7489
+ {"current_steps": 37350, "total_steps": 38150, "loss": 0.2039, "lr": 6.711294235914877e-08, "epoch": 9.790301441677588, "percentage": 97.9, "elapsed_time": "1:02:54", "remaining_time": "0:01:20", "throughput": 2653.81, "total_tokens": 10016008}
7490
+ {"current_steps": 37355, "total_steps": 38150, "loss": 0.1005, "lr": 6.627806222294086e-08, "epoch": 9.791612057667104, "percentage": 97.92, "elapsed_time": "1:02:54", "remaining_time": "0:01:20", "throughput": 2653.83, "total_tokens": 10017176}
7491
+ {"current_steps": 37360, "total_steps": 38150, "loss": 0.1054, "lr": 6.544840067165548e-08, "epoch": 9.79292267365662, "percentage": 97.93, "elapsed_time": "1:02:55", "remaining_time": "0:01:19", "throughput": 2653.88, "total_tokens": 10018584}
7492
+ {"current_steps": 37365, "total_steps": 38150, "loss": 0.1791, "lr": 6.462395787893427e-08, "epoch": 9.794233289646133, "percentage": 97.94, "elapsed_time": "1:02:55", "remaining_time": "0:01:19", "throughput": 2653.98, "total_tokens": 10020392}
7493
+ {"current_steps": 37370, "total_steps": 38150, "loss": 0.1947, "lr": 6.380473401733366e-08, "epoch": 9.795543905635649, "percentage": 97.96, "elapsed_time": "1:02:56", "remaining_time": "0:01:18", "throughput": 2654.12, "total_tokens": 10022424}
7494
+ {"current_steps": 37375, "total_steps": 38150, "loss": 0.1129, "lr": 6.299072925831373e-08, "epoch": 9.796854521625164, "percentage": 97.97, "elapsed_time": "1:02:56", "remaining_time": "0:01:18", "throughput": 2654.25, "total_tokens": 10024296}
7495
+ {"current_steps": 37380, "total_steps": 38150, "loss": 0.1901, "lr": 6.218194377224928e-08, "epoch": 9.79816513761468, "percentage": 97.98, "elapsed_time": "1:02:57", "remaining_time": "0:01:17", "throughput": 2654.23, "total_tokens": 10025304}
7496
+ {"current_steps": 37385, "total_steps": 38150, "loss": 0.0631, "lr": 6.137837772841326e-08, "epoch": 9.799475753604193, "percentage": 97.99, "elapsed_time": "1:02:57", "remaining_time": "0:01:17", "throughput": 2654.2, "total_tokens": 10026344}
7497
+ {"current_steps": 37390, "total_steps": 38150, "loss": 0.1662, "lr": 6.058003129499334e-08, "epoch": 9.800786369593709, "percentage": 98.01, "elapsed_time": "1:02:57", "remaining_time": "0:01:16", "throughput": 2654.21, "total_tokens": 10027464}
7498
+ {"current_steps": 37395, "total_steps": 38150, "loss": 0.1315, "lr": 5.978690463908088e-08, "epoch": 9.802096985583225, "percentage": 98.02, "elapsed_time": "1:02:58", "remaining_time": "0:01:16", "throughput": 2654.23, "total_tokens": 10028680}
7499
+ {"current_steps": 37400, "total_steps": 38150, "loss": 0.1931, "lr": 5.8998997926676405e-08, "epoch": 9.80340760157274, "percentage": 98.03, "elapsed_time": "1:02:58", "remaining_time": "0:01:15", "throughput": 2654.25, "total_tokens": 10029928}
7500
+ {"current_steps": 37405, "total_steps": 38150, "loss": 0.1339, "lr": 5.821631132268412e-08, "epoch": 9.804718217562254, "percentage": 98.05, "elapsed_time": "1:02:59", "remaining_time": "0:01:15", "throughput": 2654.27, "total_tokens": 10031208}
7501
+ {"current_steps": 37410, "total_steps": 38150, "loss": 0.1047, "lr": 5.743884499092578e-08, "epoch": 9.80602883355177, "percentage": 98.06, "elapsed_time": "1:02:59", "remaining_time": "0:01:14", "throughput": 2654.23, "total_tokens": 10032136}
7502
+ {"current_steps": 37415, "total_steps": 38150, "loss": 0.1277, "lr": 5.6666599094115646e-08, "epoch": 9.807339449541285, "percentage": 98.07, "elapsed_time": "1:03:00", "remaining_time": "0:01:14", "throughput": 2654.21, "total_tokens": 10033176}
7503
+ {"current_steps": 37420, "total_steps": 38150, "loss": 0.1038, "lr": 5.58995737938911e-08, "epoch": 9.808650065530799, "percentage": 98.09, "elapsed_time": "1:03:00", "remaining_time": "0:01:13", "throughput": 2654.2, "total_tokens": 10034280}
7504
+ {"current_steps": 37425, "total_steps": 38150, "loss": 0.1245, "lr": 5.513776925078207e-08, "epoch": 9.809960681520314, "percentage": 98.1, "elapsed_time": "1:03:01", "remaining_time": "0:01:13", "throughput": 2654.31, "total_tokens": 10035960}
7505
+ {"current_steps": 37430, "total_steps": 38150, "loss": 0.07, "lr": 5.4381185624238776e-08, "epoch": 9.81127129750983, "percentage": 98.11, "elapsed_time": "1:03:01", "remaining_time": "0:01:12", "throughput": 2654.29, "total_tokens": 10036984}
7506
+ {"current_steps": 37435, "total_steps": 38150, "loss": 0.1575, "lr": 5.362982307261233e-08, "epoch": 9.812581913499345, "percentage": 98.13, "elapsed_time": "1:03:01", "remaining_time": "0:01:12", "throughput": 2654.26, "total_tokens": 10038008}
7507
+ {"current_steps": 37440, "total_steps": 38150, "loss": 0.1566, "lr": 5.2883681753157497e-08, "epoch": 9.813892529488859, "percentage": 98.14, "elapsed_time": "1:03:02", "remaining_time": "0:01:11", "throughput": 2654.24, "total_tokens": 10039112}
7508
+ {"current_steps": 37445, "total_steps": 38150, "loss": 0.0962, "lr": 5.214276182204381e-08, "epoch": 9.815203145478375, "percentage": 98.15, "elapsed_time": "1:03:02", "remaining_time": "0:01:11", "throughput": 2654.23, "total_tokens": 10040216}
7509
+ {"current_steps": 37450, "total_steps": 38150, "loss": 0.1576, "lr": 5.140706343434165e-08, "epoch": 9.81651376146789, "percentage": 98.17, "elapsed_time": "1:03:03", "remaining_time": "0:01:10", "throughput": 2654.26, "total_tokens": 10041528}
7510
+ {"current_steps": 37455, "total_steps": 38150, "loss": 0.1097, "lr": 5.0676586744036194e-08, "epoch": 9.817824377457406, "percentage": 98.18, "elapsed_time": "1:03:03", "remaining_time": "0:01:10", "throughput": 2654.25, "total_tokens": 10042600}
7511
+ {"current_steps": 37460, "total_steps": 38150, "loss": 0.1815, "lr": 4.9951331904007915e-08, "epoch": 9.81913499344692, "percentage": 98.19, "elapsed_time": "1:03:04", "remaining_time": "0:01:09", "throughput": 2654.29, "total_tokens": 10043960}
7512
+ {"current_steps": 37465, "total_steps": 38150, "loss": 0.1454, "lr": 4.923129906606039e-08, "epoch": 9.820445609436435, "percentage": 98.2, "elapsed_time": "1:03:04", "remaining_time": "0:01:09", "throughput": 2654.29, "total_tokens": 10045080}
7513
+ {"current_steps": 37470, "total_steps": 38150, "loss": 0.1158, "lr": 4.851648838088696e-08, "epoch": 9.82175622542595, "percentage": 98.22, "elapsed_time": "1:03:04", "remaining_time": "0:01:08", "throughput": 2654.35, "total_tokens": 10046440}
7514
+ {"current_steps": 37475, "total_steps": 38150, "loss": 0.1061, "lr": 4.7806899998101283e-08, "epoch": 9.823066841415466, "percentage": 98.23, "elapsed_time": "1:03:05", "remaining_time": "0:01:08", "throughput": 2654.31, "total_tokens": 10047416}
7515
+ {"current_steps": 37480, "total_steps": 38150, "loss": 0.1268, "lr": 4.71025340662179e-08, "epoch": 9.82437745740498, "percentage": 98.24, "elapsed_time": "1:03:05", "remaining_time": "0:01:07", "throughput": 2654.33, "total_tokens": 10048648}
7516
+ {"current_steps": 37485, "total_steps": 38150, "loss": 0.1763, "lr": 4.6403390732654985e-08, "epoch": 9.825688073394495, "percentage": 98.26, "elapsed_time": "1:03:06", "remaining_time": "0:01:07", "throughput": 2654.36, "total_tokens": 10050008}
7517
+ {"current_steps": 37490, "total_steps": 38150, "loss": 0.1758, "lr": 4.570947014374827e-08, "epoch": 9.82699868938401, "percentage": 98.27, "elapsed_time": "1:03:06", "remaining_time": "0:01:06", "throughput": 2654.47, "total_tokens": 10051752}
7518
+ {"current_steps": 37495, "total_steps": 38150, "loss": 0.1611, "lr": 4.502077244473435e-08, "epoch": 9.828309305373526, "percentage": 98.28, "elapsed_time": "1:03:07", "remaining_time": "0:01:06", "throughput": 2654.49, "total_tokens": 10053032}
7519
+ {"current_steps": 37500, "total_steps": 38150, "loss": 0.0778, "lr": 4.4337297779750705e-08, "epoch": 9.82961992136304, "percentage": 98.3, "elapsed_time": "1:03:07", "remaining_time": "0:01:05", "throughput": 2654.48, "total_tokens": 10054104}
7520
+ {"current_steps": 37505, "total_steps": 38150, "loss": 0.0905, "lr": 4.365904629185236e-08, "epoch": 9.830930537352556, "percentage": 98.31, "elapsed_time": "1:03:08", "remaining_time": "0:01:05", "throughput": 2654.47, "total_tokens": 10055288}
7521
+ {"current_steps": 37510, "total_steps": 38150, "loss": 0.1889, "lr": 4.298601812299241e-08, "epoch": 9.832241153342071, "percentage": 98.32, "elapsed_time": "1:03:08", "remaining_time": "0:01:04", "throughput": 2654.55, "total_tokens": 10056808}
7522
+ {"current_steps": 37515, "total_steps": 38150, "loss": 0.1385, "lr": 4.2318213414038745e-08, "epoch": 9.833551769331585, "percentage": 98.34, "elapsed_time": "1:03:08", "remaining_time": "0:01:04", "throughput": 2654.56, "total_tokens": 10057960}
7523
+ {"current_steps": 37520, "total_steps": 38150, "loss": 0.1577, "lr": 4.1655632304757334e-08, "epoch": 9.8348623853211, "percentage": 98.35, "elapsed_time": "1:03:09", "remaining_time": "0:01:03", "throughput": 2654.62, "total_tokens": 10059544}
7524
+ {"current_steps": 37525, "total_steps": 38150, "loss": 0.0989, "lr": 4.0998274933828905e-08, "epoch": 9.836173001310616, "percentage": 98.36, "elapsed_time": "1:03:09", "remaining_time": "0:01:03", "throughput": 2654.63, "total_tokens": 10060680}
7525
+ {"current_steps": 37530, "total_steps": 38150, "loss": 0.1692, "lr": 4.034614143883508e-08, "epoch": 9.837483617300132, "percentage": 98.37, "elapsed_time": "1:03:10", "remaining_time": "0:01:02", "throughput": 2654.64, "total_tokens": 10061848}
7526
+ {"current_steps": 37535, "total_steps": 38150, "loss": 0.1286, "lr": 3.969923195626668e-08, "epoch": 9.838794233289645, "percentage": 98.39, "elapsed_time": "1:03:10", "remaining_time": "0:01:02", "throughput": 2654.69, "total_tokens": 10063224}
7527
+ {"current_steps": 37540, "total_steps": 38150, "loss": 0.1245, "lr": 3.9057546621520946e-08, "epoch": 9.840104849279161, "percentage": 98.4, "elapsed_time": "1:03:11", "remaining_time": "0:01:01", "throughput": 2654.74, "total_tokens": 10064728}
7528
+ {"current_steps": 37545, "total_steps": 38150, "loss": 0.062, "lr": 3.842108556890156e-08, "epoch": 9.841415465268676, "percentage": 98.41, "elapsed_time": "1:03:11", "remaining_time": "0:01:01", "throughput": 2654.74, "total_tokens": 10065832}
7529
+ {"current_steps": 37550, "total_steps": 38150, "loss": 0.1085, "lr": 3.778984893161863e-08, "epoch": 9.842726081258192, "percentage": 98.43, "elapsed_time": "1:03:12", "remaining_time": "0:01:00", "throughput": 2654.77, "total_tokens": 10067112}
7530
+ {"current_steps": 37555, "total_steps": 38150, "loss": 0.2165, "lr": 3.7163836841791476e-08, "epoch": 9.844036697247706, "percentage": 98.44, "elapsed_time": "1:03:12", "remaining_time": "0:01:00", "throughput": 2654.76, "total_tokens": 10068264}
7531
+ {"current_steps": 37560, "total_steps": 38150, "loss": 0.173, "lr": 3.654304943043752e-08, "epoch": 9.845347313237221, "percentage": 98.45, "elapsed_time": "1:03:12", "remaining_time": "0:00:59", "throughput": 2654.77, "total_tokens": 10069448}
7532
+ {"current_steps": 37565, "total_steps": 38150, "loss": 0.2022, "lr": 3.5927486827491696e-08, "epoch": 9.846657929226737, "percentage": 98.47, "elapsed_time": "1:03:13", "remaining_time": "0:00:59", "throughput": 2654.82, "total_tokens": 10070776}
7533
+ {"current_steps": 37570, "total_steps": 38150, "loss": 0.0696, "lr": 3.531714916178708e-08, "epoch": 9.847968545216252, "percentage": 98.48, "elapsed_time": "1:03:13", "remaining_time": "0:00:58", "throughput": 2654.83, "total_tokens": 10071928}
7534
+ {"current_steps": 37575, "total_steps": 38150, "loss": 0.1198, "lr": 3.4712036561068693e-08, "epoch": 9.849279161205766, "percentage": 98.49, "elapsed_time": "1:03:14", "remaining_time": "0:00:58", "throughput": 2654.79, "total_tokens": 10072888}
7535
+ {"current_steps": 37580, "total_steps": 38150, "loss": 0.058, "lr": 3.4112149151982466e-08, "epoch": 9.850589777195282, "percentage": 98.51, "elapsed_time": "1:03:14", "remaining_time": "0:00:57", "throughput": 2654.78, "total_tokens": 10074008}
7536
+ {"current_steps": 37585, "total_steps": 38150, "loss": 0.1727, "lr": 3.351748706008628e-08, "epoch": 9.851900393184797, "percentage": 98.52, "elapsed_time": "1:03:15", "remaining_time": "0:00:57", "throughput": 2654.85, "total_tokens": 10075544}
7537
+ {"current_steps": 37590, "total_steps": 38150, "loss": 0.2012, "lr": 3.292805040984171e-08, "epoch": 9.853211009174313, "percentage": 98.53, "elapsed_time": "1:03:15", "remaining_time": "0:00:56", "throughput": 2654.89, "total_tokens": 10076920}
7538
+ {"current_steps": 37595, "total_steps": 38150, "loss": 0.1669, "lr": 3.234383932461671e-08, "epoch": 9.854521625163827, "percentage": 98.55, "elapsed_time": "1:03:16", "remaining_time": "0:00:56", "throughput": 2654.96, "total_tokens": 10078456}
7539
+ {"current_steps": 37600, "total_steps": 38150, "loss": 0.1817, "lr": 3.176485392668571e-08, "epoch": 9.855832241153342, "percentage": 98.56, "elapsed_time": "1:03:16", "remaining_time": "0:00:55", "throughput": 2655.03, "total_tokens": 10079896}
7540
+ {"current_steps": 37605, "total_steps": 38150, "loss": 0.1492, "lr": 3.119109433722955e-08, "epoch": 9.857142857142858, "percentage": 98.57, "elapsed_time": "1:03:17", "remaining_time": "0:00:55", "throughput": 2655.11, "total_tokens": 10081512}
7541
+ {"current_steps": 37610, "total_steps": 38150, "loss": 0.333, "lr": 3.0622560676332734e-08, "epoch": 9.858453473132371, "percentage": 98.58, "elapsed_time": "1:03:17", "remaining_time": "0:00:54", "throughput": 2655.19, "total_tokens": 10083096}
7542
+ {"current_steps": 37615, "total_steps": 38150, "loss": 0.163, "lr": 3.005925306299173e-08, "epoch": 9.859764089121887, "percentage": 98.6, "elapsed_time": "1:03:18", "remaining_time": "0:00:54", "throughput": 2655.25, "total_tokens": 10084632}
7543
+ {"current_steps": 37620, "total_steps": 38150, "loss": 0.1332, "lr": 2.9501171615103907e-08, "epoch": 9.861074705111402, "percentage": 98.61, "elapsed_time": "1:03:18", "remaining_time": "0:00:53", "throughput": 2655.31, "total_tokens": 10086152}
7544
+ {"current_steps": 37625, "total_steps": 38150, "loss": 0.1895, "lr": 2.8948316449473044e-08, "epoch": 9.862385321100918, "percentage": 98.62, "elapsed_time": "1:03:18", "remaining_time": "0:00:53", "throughput": 2655.4, "total_tokens": 10087864}
7545
+ {"current_steps": 37630, "total_steps": 38150, "loss": 0.186, "lr": 2.840068768181492e-08, "epoch": 9.863695937090432, "percentage": 98.64, "elapsed_time": "1:03:19", "remaining_time": "0:00:52", "throughput": 2655.45, "total_tokens": 10089288}
7546
+ {"current_steps": 37635, "total_steps": 38150, "loss": 0.1775, "lr": 2.785828542674618e-08, "epoch": 9.865006553079947, "percentage": 98.65, "elapsed_time": "1:03:19", "remaining_time": "0:00:51", "throughput": 2655.49, "total_tokens": 10090552}
7547
+ {"current_steps": 37640, "total_steps": 38150, "loss": 0.1587, "lr": 2.7321109797787125e-08, "epoch": 9.866317169069463, "percentage": 98.66, "elapsed_time": "1:03:20", "remaining_time": "0:00:51", "throughput": 2655.51, "total_tokens": 10091944}
7548
+ {"current_steps": 37645, "total_steps": 38150, "loss": 0.1536, "lr": 2.6789160907372822e-08, "epoch": 9.867627785058978, "percentage": 98.68, "elapsed_time": "1:03:20", "remaining_time": "0:00:50", "throughput": 2655.57, "total_tokens": 10093400}
7549
+ {"current_steps": 37650, "total_steps": 38150, "loss": 0.1645, "lr": 2.626243886683366e-08, "epoch": 9.868938401048492, "percentage": 98.69, "elapsed_time": "1:03:21", "remaining_time": "0:00:50", "throughput": 2655.58, "total_tokens": 10094536}
7550
+ {"current_steps": 37655, "total_steps": 38150, "loss": 0.1396, "lr": 2.574094378641756e-08, "epoch": 9.870249017038008, "percentage": 98.7, "elapsed_time": "1:03:21", "remaining_time": "0:00:49", "throughput": 2655.59, "total_tokens": 10095736}
7551
+ {"current_steps": 37660, "total_steps": 38150, "loss": 0.2736, "lr": 2.5224675775270544e-08, "epoch": 9.871559633027523, "percentage": 98.72, "elapsed_time": "1:03:22", "remaining_time": "0:00:49", "throughput": 2655.6, "total_tokens": 10096888}
7552
+ {"current_steps": 37665, "total_steps": 38150, "loss": 0.0679, "lr": 2.4713634941442298e-08, "epoch": 9.872870249017039, "percentage": 98.73, "elapsed_time": "1:03:22", "remaining_time": "0:00:48", "throughput": 2655.61, "total_tokens": 10098072}
7553
+ {"current_steps": 37670, "total_steps": 38150, "loss": 0.2254, "lr": 2.4207821391900032e-08, "epoch": 9.874180865006553, "percentage": 98.74, "elapsed_time": "1:03:23", "remaining_time": "0:00:48", "throughput": 2655.71, "total_tokens": 10099832}
7554
+ {"current_steps": 37675, "total_steps": 38150, "loss": 0.0752, "lr": 2.370723523250351e-08, "epoch": 9.875491480996068, "percentage": 98.75, "elapsed_time": "1:03:23", "remaining_time": "0:00:47", "throughput": 2655.79, "total_tokens": 10101352}
7555
+ {"current_steps": 37680, "total_steps": 38150, "loss": 0.1054, "lr": 2.321187656802726e-08, "epoch": 9.876802096985584, "percentage": 98.77, "elapsed_time": "1:03:23", "remaining_time": "0:00:47", "throughput": 2655.82, "total_tokens": 10102632}
7556
+ {"current_steps": 37685, "total_steps": 38150, "loss": 0.1462, "lr": 2.272174550214945e-08, "epoch": 9.8781127129751, "percentage": 98.78, "elapsed_time": "1:03:24", "remaining_time": "0:00:46", "throughput": 2655.8, "total_tokens": 10103688}
7557
+ {"current_steps": 37690, "total_steps": 38150, "loss": 0.0636, "lr": 2.223684213745192e-08, "epoch": 9.879423328964613, "percentage": 98.79, "elapsed_time": "1:03:24", "remaining_time": "0:00:46", "throughput": 2655.82, "total_tokens": 10104904}
7558
+ {"current_steps": 37695, "total_steps": 38150, "loss": 0.1645, "lr": 2.1757166575425702e-08, "epoch": 9.880733944954128, "percentage": 98.81, "elapsed_time": "1:03:25", "remaining_time": "0:00:45", "throughput": 2655.87, "total_tokens": 10106328}
7559
+ {"current_steps": 37700, "total_steps": 38150, "loss": 0.0896, "lr": 2.1282718916465494e-08, "epoch": 9.882044560943644, "percentage": 98.82, "elapsed_time": "1:03:25", "remaining_time": "0:00:45", "throughput": 2655.87, "total_tokens": 10107496}
7560
+ {"current_steps": 37705, "total_steps": 38150, "loss": 0.1148, "lr": 2.0813499259872414e-08, "epoch": 9.883355176933158, "percentage": 98.83, "elapsed_time": "1:03:26", "remaining_time": "0:00:44", "throughput": 2655.86, "total_tokens": 10108632}
7561
+ {"current_steps": 37710, "total_steps": 38150, "loss": 0.1435, "lr": 2.0349507703851244e-08, "epoch": 9.884665792922673, "percentage": 98.85, "elapsed_time": "1:03:26", "remaining_time": "0:00:44", "throughput": 2655.91, "total_tokens": 10110008}
7562
+ {"current_steps": 37715, "total_steps": 38150, "loss": 0.174, "lr": 1.9890744345518742e-08, "epoch": 9.885976408912189, "percentage": 98.86, "elapsed_time": "1:03:27", "remaining_time": "0:00:43", "throughput": 2655.94, "total_tokens": 10111272}
7563
+ {"current_steps": 37720, "total_steps": 38150, "loss": 0.1573, "lr": 1.9437209280889768e-08, "epoch": 9.887287024901704, "percentage": 98.87, "elapsed_time": "1:03:27", "remaining_time": "0:00:43", "throughput": 2656.04, "total_tokens": 10112920}
7564
+ {"current_steps": 37725, "total_steps": 38150, "loss": 0.3003, "lr": 1.8988902604891166e-08, "epoch": 9.888597640891218, "percentage": 98.89, "elapsed_time": "1:03:28", "remaining_time": "0:00:42", "throughput": 2656.16, "total_tokens": 10114760}
7565
+ {"current_steps": 37730, "total_steps": 38150, "loss": 0.2854, "lr": 1.8545824411350665e-08, "epoch": 9.889908256880734, "percentage": 98.9, "elapsed_time": "1:03:28", "remaining_time": "0:00:42", "throughput": 2656.21, "total_tokens": 10116120}
7566
+ {"current_steps": 37735, "total_steps": 38150, "loss": 0.1543, "lr": 1.810797479300519e-08, "epoch": 9.89121887287025, "percentage": 98.91, "elapsed_time": "1:03:28", "remaining_time": "0:00:41", "throughput": 2656.3, "total_tokens": 10117784}
7567
+ {"current_steps": 37740, "total_steps": 38150, "loss": 0.1859, "lr": 1.7675353841495325e-08, "epoch": 9.892529488859765, "percentage": 98.93, "elapsed_time": "1:03:29", "remaining_time": "0:00:41", "throughput": 2656.33, "total_tokens": 10119112}
7568
+ {"current_steps": 37745, "total_steps": 38150, "loss": 0.2088, "lr": 1.7247961647368082e-08, "epoch": 9.893840104849279, "percentage": 98.94, "elapsed_time": "1:03:29", "remaining_time": "0:00:40", "throughput": 2656.36, "total_tokens": 10120424}
7569
+ {"current_steps": 37750, "total_steps": 38150, "loss": 0.102, "lr": 1.6825798300074137e-08, "epoch": 9.895150720838794, "percentage": 98.95, "elapsed_time": "1:03:30", "remaining_time": "0:00:40", "throughput": 2656.35, "total_tokens": 10121592}
7570
+ {"current_steps": 37755, "total_steps": 38150, "loss": 0.0713, "lr": 1.640886388797336e-08, "epoch": 9.89646133682831, "percentage": 98.96, "elapsed_time": "1:03:30", "remaining_time": "0:00:39", "throughput": 2656.32, "total_tokens": 10122600}
7571
+ {"current_steps": 37760, "total_steps": 38150, "loss": 0.1229, "lr": 1.5997158498329277e-08, "epoch": 9.897771952817825, "percentage": 98.98, "elapsed_time": "1:03:31", "remaining_time": "0:00:39", "throughput": 2656.34, "total_tokens": 10123880}
7572
+ {"current_steps": 37765, "total_steps": 38150, "loss": 0.0939, "lr": 1.559068221731186e-08, "epoch": 9.899082568807339, "percentage": 98.99, "elapsed_time": "1:03:31", "remaining_time": "0:00:38", "throughput": 2656.36, "total_tokens": 10125112}
7573
+ {"current_steps": 37770, "total_steps": 38150, "loss": 0.1205, "lr": 1.518943512999471e-08, "epoch": 9.900393184796854, "percentage": 99.0, "elapsed_time": "1:03:32", "remaining_time": "0:00:38", "throughput": 2656.35, "total_tokens": 10126280}
7574
+ {"current_steps": 37775, "total_steps": 38150, "loss": 0.1311, "lr": 1.4793417320357882e-08, "epoch": 9.90170380078637, "percentage": 99.02, "elapsed_time": "1:03:32", "remaining_time": "0:00:37", "throughput": 2656.45, "total_tokens": 10128024}
7575
+ {"current_steps": 37780, "total_steps": 38150, "loss": 0.1458, "lr": 1.4402628871285073e-08, "epoch": 9.903014416775886, "percentage": 99.03, "elapsed_time": "1:03:33", "remaining_time": "0:00:37", "throughput": 2656.52, "total_tokens": 10129528}
7576
+ {"current_steps": 37785, "total_steps": 38150, "loss": 0.1462, "lr": 1.401706986457474e-08, "epoch": 9.9043250327654, "percentage": 99.04, "elapsed_time": "1:03:33", "remaining_time": "0:00:36", "throughput": 2656.61, "total_tokens": 10131192}
7577
+ {"current_steps": 37790, "total_steps": 38150, "loss": 0.1805, "lr": 1.3636740380915113e-08, "epoch": 9.905635648754915, "percentage": 99.06, "elapsed_time": "1:03:34", "remaining_time": "0:00:36", "throughput": 2656.62, "total_tokens": 10132424}
7578
+ {"current_steps": 37795, "total_steps": 38150, "loss": 0.1156, "lr": 1.326164049991474e-08, "epoch": 9.90694626474443, "percentage": 99.07, "elapsed_time": "1:03:34", "remaining_time": "0:00:35", "throughput": 2656.67, "total_tokens": 10133816}
7579
+ {"current_steps": 37800, "total_steps": 38150, "loss": 0.2247, "lr": 1.2891770300080263e-08, "epoch": 9.908256880733944, "percentage": 99.08, "elapsed_time": "1:03:34", "remaining_time": "0:00:35", "throughput": 2656.72, "total_tokens": 10135176}
7580
+ {"current_steps": 37805, "total_steps": 38150, "loss": 0.154, "lr": 1.2527129858821985e-08, "epoch": 9.90956749672346, "percentage": 99.1, "elapsed_time": "1:03:35", "remaining_time": "0:00:34", "throughput": 2656.74, "total_tokens": 10136376}
7581
+ {"current_steps": 37810, "total_steps": 38150, "loss": 0.2345, "lr": 1.2167719252462183e-08, "epoch": 9.910878112712975, "percentage": 99.11, "elapsed_time": "1:03:35", "remaining_time": "0:00:34", "throughput": 2656.9, "total_tokens": 10138408}
7582
+ {"current_steps": 37815, "total_steps": 38150, "loss": 0.1353, "lr": 1.1813538556221249e-08, "epoch": 9.91218872870249, "percentage": 99.12, "elapsed_time": "1:03:36", "remaining_time": "0:00:33", "throughput": 2656.92, "total_tokens": 10139672}
7583
+ {"current_steps": 37820, "total_steps": 38150, "loss": 0.2638, "lr": 1.1464587844231544e-08, "epoch": 9.913499344692005, "percentage": 99.13, "elapsed_time": "1:03:36", "remaining_time": "0:00:33", "throughput": 2657.03, "total_tokens": 10141624}
7584
+ {"current_steps": 37825, "total_steps": 38150, "loss": 0.1724, "lr": 1.1120867189526318e-08, "epoch": 9.91480996068152, "percentage": 99.15, "elapsed_time": "1:03:37", "remaining_time": "0:00:32", "throughput": 2657.04, "total_tokens": 10142920}
7585
+ {"current_steps": 37830, "total_steps": 38150, "loss": 0.1568, "lr": 1.0782376664045246e-08, "epoch": 9.916120576671036, "percentage": 99.16, "elapsed_time": "1:03:37", "remaining_time": "0:00:32", "throughput": 2657.15, "total_tokens": 10144664}
7586
+ {"current_steps": 37835, "total_steps": 38150, "loss": 0.0871, "lr": 1.044911633863721e-08, "epoch": 9.917431192660551, "percentage": 99.17, "elapsed_time": "1:03:38", "remaining_time": "0:00:31", "throughput": 2657.19, "total_tokens": 10146008}
7587
+ {"current_steps": 37840, "total_steps": 38150, "loss": 0.1192, "lr": 1.0121086283049198e-08, "epoch": 9.918741808650065, "percentage": 99.19, "elapsed_time": "1:03:38", "remaining_time": "0:00:31", "throughput": 2657.27, "total_tokens": 10147608}
7588
+ {"current_steps": 37845, "total_steps": 38150, "loss": 0.2001, "lr": 9.798286565937399e-09, "epoch": 9.92005242463958, "percentage": 99.2, "elapsed_time": "1:03:39", "remaining_time": "0:00:30", "throughput": 2657.27, "total_tokens": 10148712}
7589
+ {"current_steps": 37850, "total_steps": 38150, "loss": 0.1447, "lr": 9.48071725486721e-09, "epoch": 9.921363040629096, "percentage": 99.21, "elapsed_time": "1:03:39", "remaining_time": "0:00:30", "throughput": 2657.32, "total_tokens": 10150040}
7590
+ {"current_steps": 37855, "total_steps": 38150, "loss": 0.1506, "lr": 9.168378416299362e-09, "epoch": 9.922673656618612, "percentage": 99.23, "elapsed_time": "1:03:40", "remaining_time": "0:00:29", "throughput": 2657.44, "total_tokens": 10152024}
7591
+ {"current_steps": 37860, "total_steps": 38150, "loss": 0.2274, "lr": 8.861270115612108e-09, "epoch": 9.923984272608125, "percentage": 99.24, "elapsed_time": "1:03:40", "remaining_time": "0:00:29", "throughput": 2657.51, "total_tokens": 10153480}
7592
+ {"current_steps": 37865, "total_steps": 38150, "loss": 0.1447, "lr": 8.559392417079038e-09, "epoch": 9.92529488859764, "percentage": 99.25, "elapsed_time": "1:03:41", "remaining_time": "0:00:28", "throughput": 2657.57, "total_tokens": 10154936}
7593
+ {"current_steps": 37870, "total_steps": 38150, "loss": 0.0527, "lr": 8.262745383880166e-09, "epoch": 9.926605504587156, "percentage": 99.27, "elapsed_time": "1:03:41", "remaining_time": "0:00:28", "throughput": 2657.54, "total_tokens": 10155960}
7594
+ {"current_steps": 37875, "total_steps": 38150, "loss": 0.2633, "lr": 7.971329078110268e-09, "epoch": 9.927916120576672, "percentage": 99.28, "elapsed_time": "1:03:41", "remaining_time": "0:00:27", "throughput": 2657.56, "total_tokens": 10157160}
7595
+ {"current_steps": 37880, "total_steps": 38150, "loss": 0.1345, "lr": 7.68514356075667e-09, "epoch": 9.929226736566186, "percentage": 99.29, "elapsed_time": "1:03:42", "remaining_time": "0:00:27", "throughput": 2657.57, "total_tokens": 10158472}
7596
+ {"current_steps": 37885, "total_steps": 38150, "loss": 0.2196, "lr": 7.4041888917186775e-09, "epoch": 9.930537352555701, "percentage": 99.31, "elapsed_time": "1:03:42", "remaining_time": "0:00:26", "throughput": 2657.61, "total_tokens": 10159832}
7597
+ {"current_steps": 37890, "total_steps": 38150, "loss": 0.0851, "lr": 7.1284651297992556e-09, "epoch": 9.931847968545217, "percentage": 99.32, "elapsed_time": "1:03:43", "remaining_time": "0:00:26", "throughput": 2657.64, "total_tokens": 10161096}
7598
+ {"current_steps": 37895, "total_steps": 38150, "loss": 0.1187, "lr": 6.8579723327105715e-09, "epoch": 9.93315858453473, "percentage": 99.33, "elapsed_time": "1:03:43", "remaining_time": "0:00:25", "throughput": 2657.69, "total_tokens": 10162616}
7599
+ {"current_steps": 37900, "total_steps": 38150, "loss": 0.1574, "lr": 6.5927105570601224e-09, "epoch": 9.934469200524246, "percentage": 99.34, "elapsed_time": "1:03:44", "remaining_time": "0:00:25", "throughput": 2657.71, "total_tokens": 10163912}
7600
+ {"current_steps": 37905, "total_steps": 38150, "loss": 0.1258, "lr": 6.3326798583729364e-09, "epoch": 9.935779816513762, "percentage": 99.36, "elapsed_time": "1:03:44", "remaining_time": "0:00:24", "throughput": 2657.74, "total_tokens": 10165224}
7601
+ {"current_steps": 37910, "total_steps": 38150, "loss": 0.1768, "lr": 6.077880291069371e-09, "epoch": 9.937090432503277, "percentage": 99.37, "elapsed_time": "1:03:45", "remaining_time": "0:00:24", "throughput": 2657.82, "total_tokens": 10166904}
7602
+ {"current_steps": 37915, "total_steps": 38150, "loss": 0.1451, "lr": 5.8283119084789895e-09, "epoch": 9.938401048492791, "percentage": 99.38, "elapsed_time": "1:03:45", "remaining_time": "0:00:23", "throughput": 2657.87, "total_tokens": 10168280}
7603
+ {"current_steps": 37920, "total_steps": 38150, "loss": 0.1806, "lr": 5.583974762835009e-09, "epoch": 9.939711664482306, "percentage": 99.4, "elapsed_time": "1:03:46", "remaining_time": "0:00:23", "throughput": 2657.92, "total_tokens": 10169624}
7604
+ {"current_steps": 37925, "total_steps": 38150, "loss": 0.1136, "lr": 5.344868905279854e-09, "epoch": 9.941022280471822, "percentage": 99.41, "elapsed_time": "1:03:46", "remaining_time": "0:00:22", "throughput": 2657.92, "total_tokens": 10170840}
7605
+ {"current_steps": 37930, "total_steps": 38150, "loss": 0.1074, "lr": 5.110994385856826e-09, "epoch": 9.942332896461338, "percentage": 99.42, "elapsed_time": "1:03:47", "remaining_time": "0:00:22", "throughput": 2657.92, "total_tokens": 10172008}
7606
+ {"current_steps": 37935, "total_steps": 38150, "loss": 0.1091, "lr": 4.8823512535128845e-09, "epoch": 9.943643512450851, "percentage": 99.44, "elapsed_time": "1:03:47", "remaining_time": "0:00:21", "throughput": 2658.14, "total_tokens": 10174936}
7607
+ {"current_steps": 37940, "total_steps": 38150, "loss": 0.2523, "lr": 4.658939556104191e-09, "epoch": 9.944954128440367, "percentage": 99.45, "elapsed_time": "1:03:48", "remaining_time": "0:00:21", "throughput": 2658.17, "total_tokens": 10176232}
7608
+ {"current_steps": 37945, "total_steps": 38150, "loss": 0.194, "lr": 4.440759340393341e-09, "epoch": 9.946264744429882, "percentage": 99.46, "elapsed_time": "1:03:48", "remaining_time": "0:00:20", "throughput": 2658.19, "total_tokens": 10177416}
7609
+ {"current_steps": 37950, "total_steps": 38150, "loss": 0.1112, "lr": 4.227810652041031e-09, "epoch": 9.947575360419398, "percentage": 99.48, "elapsed_time": "1:03:49", "remaining_time": "0:00:20", "throughput": 2658.24, "total_tokens": 10178792}
7610
+ {"current_steps": 37955, "total_steps": 38150, "loss": 0.0539, "lr": 4.0200935356171645e-09, "epoch": 9.948885976408912, "percentage": 99.49, "elapsed_time": "1:03:49", "remaining_time": "0:00:19", "throughput": 2658.24, "total_tokens": 10179912}
7611
+ {"current_steps": 37960, "total_steps": 38150, "loss": 0.0891, "lr": 3.817608034600851e-09, "epoch": 9.950196592398427, "percentage": 99.5, "elapsed_time": "1:03:50", "remaining_time": "0:00:19", "throughput": 2658.26, "total_tokens": 10181160}
7612
+ {"current_steps": 37965, "total_steps": 38150, "loss": 0.2526, "lr": 3.620354191366526e-09, "epoch": 9.951507208387943, "percentage": 99.52, "elapsed_time": "1:03:50", "remaining_time": "0:00:18", "throughput": 2658.27, "total_tokens": 10182376}
7613
+ {"current_steps": 37970, "total_steps": 38150, "loss": 0.2101, "lr": 3.4283320472033864e-09, "epoch": 9.952817824377458, "percentage": 99.53, "elapsed_time": "1:03:50", "remaining_time": "0:00:18", "throughput": 2658.33, "total_tokens": 10183816}
7614
+ {"current_steps": 37975, "total_steps": 38150, "loss": 0.2558, "lr": 3.241541642298729e-09, "epoch": 9.954128440366972, "percentage": 99.54, "elapsed_time": "1:03:51", "remaining_time": "0:00:17", "throughput": 2658.34, "total_tokens": 10185016}
7615
+ {"current_steps": 37980, "total_steps": 38150, "loss": 0.1513, "lr": 3.059983015749057e-09, "epoch": 9.955439056356488, "percentage": 99.55, "elapsed_time": "1:03:51", "remaining_time": "0:00:17", "throughput": 2658.35, "total_tokens": 10186248}
7616
+ {"current_steps": 37985, "total_steps": 38150, "loss": 0.1296, "lr": 2.88365620555453e-09, "epoch": 9.956749672346003, "percentage": 99.57, "elapsed_time": "1:03:52", "remaining_time": "0:00:16", "throughput": 2658.43, "total_tokens": 10187944}
7617
+ {"current_steps": 37990, "total_steps": 38150, "loss": 0.1516, "lr": 2.712561248618961e-09, "epoch": 9.958060288335517, "percentage": 99.58, "elapsed_time": "1:03:52", "remaining_time": "0:00:16", "throughput": 2658.42, "total_tokens": 10189064}
7618
+ {"current_steps": 37995, "total_steps": 38150, "loss": 0.1585, "lr": 2.546698180749818e-09, "epoch": 9.959370904325032, "percentage": 99.59, "elapsed_time": "1:03:53", "remaining_time": "0:00:15", "throughput": 2658.5, "total_tokens": 10190616}
7619
+ {"current_steps": 38000, "total_steps": 38150, "loss": 0.1679, "lr": 2.3860670366665505e-09, "epoch": 9.960681520314548, "percentage": 99.61, "elapsed_time": "1:03:53", "remaining_time": "0:00:15", "throughput": 2658.58, "total_tokens": 10192200}
7620
+ {"current_steps": 38005, "total_steps": 38150, "loss": 0.1765, "lr": 2.2306678499867116e-09, "epoch": 9.961992136304064, "percentage": 99.62, "elapsed_time": "1:03:54", "remaining_time": "0:00:14", "throughput": 2658.69, "total_tokens": 10193928}
7621
+ {"current_steps": 38010, "total_steps": 38150, "loss": 0.151, "lr": 2.080500653234285e-09, "epoch": 9.963302752293577, "percentage": 99.63, "elapsed_time": "1:03:54", "remaining_time": "0:00:14", "throughput": 2658.73, "total_tokens": 10195240}
7622
+ {"current_steps": 38015, "total_steps": 38150, "loss": 0.164, "lr": 1.935565477839685e-09, "epoch": 9.964613368283093, "percentage": 99.65, "elapsed_time": "1:03:55", "remaining_time": "0:00:13", "throughput": 2658.85, "total_tokens": 10197112}
7623
+ {"current_steps": 38020, "total_steps": 38150, "loss": 0.1545, "lr": 1.7958623541397546e-09, "epoch": 9.965923984272608, "percentage": 99.66, "elapsed_time": "1:03:55", "remaining_time": "0:00:13", "throughput": 2658.89, "total_tokens": 10198408}
7624
+ {"current_steps": 38025, "total_steps": 38150, "loss": 0.1132, "lr": 1.6613913113694424e-09, "epoch": 9.967234600262124, "percentage": 99.67, "elapsed_time": "1:03:56", "remaining_time": "0:00:12", "throughput": 2658.93, "total_tokens": 10199784}
7625
+ {"current_steps": 38030, "total_steps": 38150, "loss": 0.0771, "lr": 1.532152377678453e-09, "epoch": 9.968545216251638, "percentage": 99.69, "elapsed_time": "1:03:56", "remaining_time": "0:00:12", "throughput": 2658.91, "total_tokens": 10200824}
7626
+ {"current_steps": 38035, "total_steps": 38150, "loss": 0.1052, "lr": 1.4081455801145948e-09, "epoch": 9.969855832241153, "percentage": 99.7, "elapsed_time": "1:03:56", "remaining_time": "0:00:11", "throughput": 2658.9, "total_tokens": 10201912}
7627
+ {"current_steps": 38040, "total_steps": 38150, "loss": 0.0675, "lr": 1.289370944629331e-09, "epoch": 9.971166448230669, "percentage": 99.71, "elapsed_time": "1:03:57", "remaining_time": "0:00:11", "throughput": 2658.86, "total_tokens": 10202856}
7628
+ {"current_steps": 38045, "total_steps": 38150, "loss": 0.1379, "lr": 1.1758284960861066e-09, "epoch": 9.972477064220184, "percentage": 99.72, "elapsed_time": "1:03:57", "remaining_time": "0:00:10", "throughput": 2658.84, "total_tokens": 10203896}
7629
+ {"current_steps": 38050, "total_steps": 38150, "loss": 0.1102, "lr": 1.0675182582464693e-09, "epoch": 9.973787680209698, "percentage": 99.74, "elapsed_time": "1:03:58", "remaining_time": "0:00:10", "throughput": 2658.89, "total_tokens": 10205272}
7630
+ {"current_steps": 38055, "total_steps": 38150, "loss": 0.2056, "lr": 9.644402537811736e-10, "epoch": 9.975098296199214, "percentage": 99.75, "elapsed_time": "1:03:58", "remaining_time": "0:00:09", "throughput": 2658.99, "total_tokens": 10206936}
7631
+ {"current_steps": 38060, "total_steps": 38150, "loss": 0.0884, "lr": 8.665945042618529e-10, "epoch": 9.97640891218873, "percentage": 99.76, "elapsed_time": "1:03:59", "remaining_time": "0:00:09", "throughput": 2658.96, "total_tokens": 10207912}
7632
+ {"current_steps": 38065, "total_steps": 38150, "loss": 0.1877, "lr": 7.739810301693462e-10, "epoch": 9.977719528178245, "percentage": 99.78, "elapsed_time": "1:03:59", "remaining_time": "0:00:08", "throughput": 2658.92, "total_tokens": 10208904}
7633
+ {"current_steps": 38070, "total_steps": 38150, "loss": 0.2342, "lr": 6.865998508881477e-10, "epoch": 9.979030144167758, "percentage": 99.79, "elapsed_time": "1:03:59", "remaining_time": "0:00:08", "throughput": 2658.9, "total_tokens": 10209928}
7634
+ {"current_steps": 38075, "total_steps": 38150, "loss": 0.0629, "lr": 6.044509847064062e-10, "epoch": 9.980340760157274, "percentage": 99.8, "elapsed_time": "1:04:00", "remaining_time": "0:00:07", "throughput": 2658.96, "total_tokens": 10211368}
7635
+ {"current_steps": 38080, "total_steps": 38150, "loss": 0.1347, "lr": 5.275344488187006e-10, "epoch": 9.98165137614679, "percentage": 99.82, "elapsed_time": "1:04:00", "remaining_time": "0:00:07", "throughput": 2658.97, "total_tokens": 10212600}
7636
+ {"current_steps": 38085, "total_steps": 38150, "loss": 0.1151, "lr": 4.5585025932048943e-10, "epoch": 9.982961992136303, "percentage": 99.83, "elapsed_time": "1:04:01", "remaining_time": "0:00:06", "throughput": 2658.95, "total_tokens": 10213656}
7637
+ {"current_steps": 38090, "total_steps": 38150, "loss": 0.2345, "lr": 3.893984312164367e-10, "epoch": 9.984272608125819, "percentage": 99.84, "elapsed_time": "1:04:01", "remaining_time": "0:00:06", "throughput": 2659.04, "total_tokens": 10215272}
7638
+ {"current_steps": 38095, "total_steps": 38150, "loss": 0.1064, "lr": 3.28178978417637e-10, "epoch": 9.985583224115334, "percentage": 99.86, "elapsed_time": "1:04:02", "remaining_time": "0:00:05", "throughput": 2659.1, "total_tokens": 10216776}
7639
+ {"current_steps": 38100, "total_steps": 38150, "loss": 0.0903, "lr": 2.7219191373328846e-10, "epoch": 9.98689384010485, "percentage": 99.87, "elapsed_time": "1:04:02", "remaining_time": "0:00:05", "throughput": 2659.1, "total_tokens": 10217960}
7640
+ {"current_steps": 38105, "total_steps": 38150, "loss": 0.0594, "lr": 2.2143724888179506e-10, "epoch": 9.988204456094364, "percentage": 99.88, "elapsed_time": "1:04:03", "remaining_time": "0:00:04", "throughput": 2659.12, "total_tokens": 10219192}
7641
+ {"current_steps": 38110, "total_steps": 38150, "loss": 0.2746, "lr": 1.759149944879912e-10, "epoch": 9.98951507208388, "percentage": 99.9, "elapsed_time": "1:04:03", "remaining_time": "0:00:04", "throughput": 2659.22, "total_tokens": 10220904}
7642
+ {"current_steps": 38115, "total_steps": 38150, "loss": 0.2931, "lr": 1.3562516008036597e-10, "epoch": 9.990825688073395, "percentage": 99.91, "elapsed_time": "1:04:04", "remaining_time": "0:00:03", "throughput": 2659.25, "total_tokens": 10222152}
7643
+ {"current_steps": 38120, "total_steps": 38150, "loss": 0.1849, "lr": 1.0056775408828767e-10, "epoch": 9.99213630406291, "percentage": 99.92, "elapsed_time": "1:04:04", "remaining_time": "0:00:03", "throughput": 2659.31, "total_tokens": 10223672}
7644
+ {"current_steps": 38125, "total_steps": 38150, "loss": 0.2286, "lr": 7.07427838503305e-11, "epoch": 9.993446920052424, "percentage": 99.93, "elapsed_time": "1:04:05", "remaining_time": "0:00:02", "throughput": 2659.47, "total_tokens": 10225736}
7645
+ {"current_steps": 38130, "total_steps": 38150, "loss": 0.1213, "lr": 4.6150255611498996e-11, "epoch": 9.99475753604194, "percentage": 99.95, "elapsed_time": "1:04:05", "remaining_time": "0:00:02", "throughput": 2659.56, "total_tokens": 10227320}
7646
+ {"current_steps": 38135, "total_steps": 38150, "loss": 0.1191, "lr": 2.679017451490129e-11, "epoch": 9.996068152031455, "percentage": 99.96, "elapsed_time": "1:04:05", "remaining_time": "0:00:01", "throughput": 2659.57, "total_tokens": 10228504}
7647
+ {"current_steps": 38140, "total_steps": 38150, "loss": 0.2585, "lr": 1.2662544615626993e-11, "epoch": 9.997378768020969, "percentage": 99.97, "elapsed_time": "1:04:06", "remaining_time": "0:00:01", "throughput": 2659.57, "total_tokens": 10229624}
7648
+ {"current_steps": 38145, "total_steps": 38150, "loss": 0.2906, "lr": 3.767368869644905e-12, "epoch": 9.998689384010484, "percentage": 99.99, "elapsed_time": "1:04:06", "remaining_time": "0:00:00", "throughput": 2659.62, "total_tokens": 10231048}
7649
+ {"current_steps": 38150, "total_steps": 38150, "loss": 0.2814, "lr": 1.0464913657859399e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:04:07", "remaining_time": "0:00:00", "throughput": 2659.55, "total_tokens": 10232192}
7650
+ {"current_steps": 38150, "total_steps": 38150, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:04:09", "remaining_time": "0:00:00", "throughput": 2658.36, "total_tokens": 10232192}