rbelanec commited on
Commit
fbc09a2
verified
1 Parent(s): 34f95da

Training in progress, step 38150

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +375 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b9bcb811e3bf6de432235111024192ae250c7e08b865f831efba6026dd8c0d8
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ee03e5b5e846dbea061390e18a0a97e8a479400a8ba7aa636edbc2a0cc13e33
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -7273,3 +7273,378 @@
7273
  {"current_steps": 36270, "total_steps": 38150, "loss": 0.152, "lr": 3.693524234043594e-07, "epoch": 9.507208387942333, "percentage": 95.07, "elapsed_time": "1:00:11", "remaining_time": "0:03:07", "throughput": 2692.02, "total_tokens": 9723480}
7274
  {"current_steps": 36275, "total_steps": 38150, "loss": 0.1421, "lr": 3.6739625437180457e-07, "epoch": 9.508519003931848, "percentage": 95.09, "elapsed_time": "1:00:12", "remaining_time": "0:03:06", "throughput": 2692.13, "total_tokens": 9725224}
7275
  {"current_steps": 36280, "total_steps": 38150, "loss": 0.3595, "lr": 3.654452409006498e-07, "epoch": 9.509829619921362, "percentage": 95.1, "elapsed_time": "1:00:12", "remaining_time": "0:03:06", "throughput": 2692.16, "total_tokens": 9726520}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7273
  {"current_steps": 36270, "total_steps": 38150, "loss": 0.152, "lr": 3.693524234043594e-07, "epoch": 9.507208387942333, "percentage": 95.07, "elapsed_time": "1:00:11", "remaining_time": "0:03:07", "throughput": 2692.02, "total_tokens": 9723480}
7274
  {"current_steps": 36275, "total_steps": 38150, "loss": 0.1421, "lr": 3.6739625437180457e-07, "epoch": 9.508519003931848, "percentage": 95.09, "elapsed_time": "1:00:12", "remaining_time": "0:03:06", "throughput": 2692.13, "total_tokens": 9725224}
7275
  {"current_steps": 36280, "total_steps": 38150, "loss": 0.3595, "lr": 3.654452409006498e-07, "epoch": 9.509829619921362, "percentage": 95.1, "elapsed_time": "1:00:12", "remaining_time": "0:03:06", "throughput": 2692.16, "total_tokens": 9726520}
7276
+ {"current_steps": 36285, "total_steps": 38150, "loss": 0.0853, "lr": 3.634993833992434e-07, "epoch": 9.511140235910878, "percentage": 95.11, "elapsed_time": "1:00:13", "remaining_time": "0:03:05", "throughput": 2692.07, "total_tokens": 9727352}
7277
+ {"current_steps": 36290, "total_steps": 38150, "loss": 0.1123, "lr": 3.615586822748457e-07, "epoch": 9.512450851900393, "percentage": 95.12, "elapsed_time": "1:00:13", "remaining_time": "0:03:05", "throughput": 2692.13, "total_tokens": 9728760}
7278
+ {"current_steps": 36295, "total_steps": 38150, "loss": 0.0724, "lr": 3.596231379336429e-07, "epoch": 9.513761467889909, "percentage": 95.14, "elapsed_time": "1:00:14", "remaining_time": "0:03:04", "throughput": 2692.23, "total_tokens": 9730360}
7279
+ {"current_steps": 36300, "total_steps": 38150, "loss": 0.1198, "lr": 3.5769275078074425e-07, "epoch": 9.515072083879424, "percentage": 95.15, "elapsed_time": "1:00:14", "remaining_time": "0:03:04", "throughput": 2692.25, "total_tokens": 9731688}
7280
+ {"current_steps": 36305, "total_steps": 38150, "loss": 0.1929, "lr": 3.5576752122017385e-07, "epoch": 9.516382699868938, "percentage": 95.16, "elapsed_time": "1:00:15", "remaining_time": "0:03:03", "throughput": 2692.31, "total_tokens": 9733160}
7281
+ {"current_steps": 36310, "total_steps": 38150, "loss": 0.093, "lr": 3.5384744965487883e-07, "epoch": 9.517693315858454, "percentage": 95.18, "elapsed_time": "1:00:15", "remaining_time": "0:03:03", "throughput": 2692.38, "total_tokens": 9734760}
7282
+ {"current_steps": 36315, "total_steps": 38150, "loss": 0.0743, "lr": 3.519325364867265e-07, "epoch": 9.51900393184797, "percentage": 95.19, "elapsed_time": "1:00:16", "remaining_time": "0:03:02", "throughput": 2692.4, "total_tokens": 9735960}
7283
+ {"current_steps": 36320, "total_steps": 38150, "loss": 0.1604, "lr": 3.5002278211650476e-07, "epoch": 9.520314547837483, "percentage": 95.2, "elapsed_time": "1:00:16", "remaining_time": "0:03:02", "throughput": 2692.46, "total_tokens": 9737416}
7284
+ {"current_steps": 36325, "total_steps": 38150, "loss": 0.1655, "lr": 3.4811818694392706e-07, "epoch": 9.521625163826998, "percentage": 95.22, "elapsed_time": "1:00:17", "remaining_time": "0:03:01", "throughput": 2692.52, "total_tokens": 9738888}
7285
+ {"current_steps": 36330, "total_steps": 38150, "loss": 0.0637, "lr": 3.462187513676135e-07, "epoch": 9.522935779816514, "percentage": 95.23, "elapsed_time": "1:00:17", "remaining_time": "0:03:01", "throughput": 2692.49, "total_tokens": 9739880}
7286
+ {"current_steps": 36335, "total_steps": 38150, "loss": 0.1003, "lr": 3.4432447578511826e-07, "epoch": 9.52424639580603, "percentage": 95.24, "elapsed_time": "1:00:17", "remaining_time": "0:03:00", "throughput": 2692.55, "total_tokens": 9741336}
7287
+ {"current_steps": 36340, "total_steps": 38150, "loss": 0.184, "lr": 3.4243536059291035e-07, "epoch": 9.525557011795543, "percentage": 95.26, "elapsed_time": "1:00:18", "remaining_time": "0:03:00", "throughput": 2692.61, "total_tokens": 9742776}
7288
+ {"current_steps": 36345, "total_steps": 38150, "loss": 0.0883, "lr": 3.405514061863735e-07, "epoch": 9.526867627785059, "percentage": 95.27, "elapsed_time": "1:00:18", "remaining_time": "0:02:59", "throughput": 2692.65, "total_tokens": 9744072}
7289
+ {"current_steps": 36350, "total_steps": 38150, "loss": 0.2157, "lr": 3.3867261295982e-07, "epoch": 9.528178243774574, "percentage": 95.28, "elapsed_time": "1:00:19", "remaining_time": "0:02:59", "throughput": 2692.69, "total_tokens": 9745384}
7290
+ {"current_steps": 36355, "total_steps": 38150, "loss": 0.2285, "lr": 3.36798981306477e-07, "epoch": 9.52948885976409, "percentage": 95.29, "elapsed_time": "1:00:19", "remaining_time": "0:02:58", "throughput": 2692.85, "total_tokens": 9747400}
7291
+ {"current_steps": 36360, "total_steps": 38150, "loss": 0.1603, "lr": 3.3493051161849477e-07, "epoch": 9.530799475753604, "percentage": 95.31, "elapsed_time": "1:00:20", "remaining_time": "0:02:58", "throughput": 2692.93, "total_tokens": 9749032}
7292
+ {"current_steps": 36365, "total_steps": 38150, "loss": 0.1984, "lr": 3.330672042869354e-07, "epoch": 9.53211009174312, "percentage": 95.32, "elapsed_time": "1:00:20", "remaining_time": "0:02:57", "throughput": 2693.01, "total_tokens": 9750648}
7293
+ {"current_steps": 36370, "total_steps": 38150, "loss": 0.0962, "lr": 3.312090597017925e-07, "epoch": 9.533420707732635, "percentage": 95.33, "elapsed_time": "1:00:21", "remaining_time": "0:02:57", "throughput": 2693.07, "total_tokens": 9752392}
7294
+ {"current_steps": 36375, "total_steps": 38150, "loss": 0.12, "lr": 3.2935607825196614e-07, "epoch": 9.534731323722148, "percentage": 95.35, "elapsed_time": "1:00:21", "remaining_time": "0:02:56", "throughput": 2693.18, "total_tokens": 9754152}
7295
+ {"current_steps": 36380, "total_steps": 38150, "loss": 0.0695, "lr": 3.275082603252877e-07, "epoch": 9.536041939711664, "percentage": 95.36, "elapsed_time": "1:00:22", "remaining_time": "0:02:56", "throughput": 2693.14, "total_tokens": 9755128}
7296
+ {"current_steps": 36385, "total_steps": 38150, "loss": 0.0703, "lr": 3.2566560630849784e-07, "epoch": 9.53735255570118, "percentage": 95.37, "elapsed_time": "1:00:22", "remaining_time": "0:02:55", "throughput": 2693.17, "total_tokens": 9756408}
7297
+ {"current_steps": 36390, "total_steps": 38150, "loss": 0.1618, "lr": 3.2382811658726033e-07, "epoch": 9.538663171690695, "percentage": 95.39, "elapsed_time": "1:00:23", "remaining_time": "0:02:55", "throughput": 2693.14, "total_tokens": 9757432}
7298
+ {"current_steps": 36395, "total_steps": 38150, "loss": 0.061, "lr": 3.219957915461619e-07, "epoch": 9.539973787680209, "percentage": 95.4, "elapsed_time": "1:00:23", "remaining_time": "0:02:54", "throughput": 2693.2, "total_tokens": 9758856}
7299
+ {"current_steps": 36400, "total_steps": 38150, "loss": 0.13, "lr": 3.201686315687041e-07, "epoch": 9.541284403669724, "percentage": 95.41, "elapsed_time": "1:00:24", "remaining_time": "0:02:54", "throughput": 2693.39, "total_tokens": 9761496}
7300
+ {"current_steps": 36405, "total_steps": 38150, "loss": 0.1309, "lr": 3.183466370373117e-07, "epoch": 9.54259501965924, "percentage": 95.43, "elapsed_time": "1:00:24", "remaining_time": "0:02:53", "throughput": 2693.38, "total_tokens": 9762616}
7301
+ {"current_steps": 36410, "total_steps": 38150, "loss": 0.1386, "lr": 3.1652980833331846e-07, "epoch": 9.543905635648755, "percentage": 95.44, "elapsed_time": "1:00:25", "remaining_time": "0:02:53", "throughput": 2693.4, "total_tokens": 9763864}
7302
+ {"current_steps": 36415, "total_steps": 38150, "loss": 0.1269, "lr": 3.147181458369897e-07, "epoch": 9.54521625163827, "percentage": 95.45, "elapsed_time": "1:00:25", "remaining_time": "0:02:52", "throughput": 2693.4, "total_tokens": 9764968}
7303
+ {"current_steps": 36420, "total_steps": 38150, "loss": 0.169, "lr": 3.12911649927497e-07, "epoch": 9.546526867627785, "percentage": 95.47, "elapsed_time": "1:00:25", "remaining_time": "0:02:52", "throughput": 2693.46, "total_tokens": 9766424}
7304
+ {"current_steps": 36425, "total_steps": 38150, "loss": 0.0868, "lr": 3.111103209829408e-07, "epoch": 9.5478374836173, "percentage": 95.48, "elapsed_time": "1:00:26", "remaining_time": "0:02:51", "throughput": 2693.53, "total_tokens": 9767912}
7305
+ {"current_steps": 36430, "total_steps": 38150, "loss": 0.0841, "lr": 3.0931415938033336e-07, "epoch": 9.549148099606816, "percentage": 95.49, "elapsed_time": "1:00:26", "remaining_time": "0:02:51", "throughput": 2693.49, "total_tokens": 9768904}
7306
+ {"current_steps": 36435, "total_steps": 38150, "loss": 0.1449, "lr": 3.075231654956129e-07, "epoch": 9.55045871559633, "percentage": 95.5, "elapsed_time": "1:00:27", "remaining_time": "0:02:50", "throughput": 2693.58, "total_tokens": 9770552}
7307
+ {"current_steps": 36440, "total_steps": 38150, "loss": 0.2064, "lr": 3.0573733970362674e-07, "epoch": 9.551769331585845, "percentage": 95.52, "elapsed_time": "1:00:27", "remaining_time": "0:02:50", "throughput": 2693.59, "total_tokens": 9771704}
7308
+ {"current_steps": 36445, "total_steps": 38150, "loss": 0.1631, "lr": 3.0395668237814813e-07, "epoch": 9.55307994757536, "percentage": 95.53, "elapsed_time": "1:00:28", "remaining_time": "0:02:49", "throughput": 2693.69, "total_tokens": 9773320}
7309
+ {"current_steps": 36450, "total_steps": 38150, "loss": 0.1425, "lr": 3.0218119389186503e-07, "epoch": 9.554390563564876, "percentage": 95.54, "elapsed_time": "1:00:28", "remaining_time": "0:02:49", "throughput": 2693.68, "total_tokens": 9774424}
7310
+ {"current_steps": 36455, "total_steps": 38150, "loss": 0.0587, "lr": 3.0041087461638304e-07, "epoch": 9.55570117955439, "percentage": 95.56, "elapsed_time": "1:00:29", "remaining_time": "0:02:48", "throughput": 2693.63, "total_tokens": 9775336}
7311
+ {"current_steps": 36460, "total_steps": 38150, "loss": 0.1666, "lr": 2.986457249222252e-07, "epoch": 9.557011795543906, "percentage": 95.57, "elapsed_time": "1:00:29", "remaining_time": "0:02:48", "throughput": 2693.66, "total_tokens": 9776680}
7312
+ {"current_steps": 36465, "total_steps": 38150, "loss": 0.1577, "lr": 2.968857451788404e-07, "epoch": 9.558322411533421, "percentage": 95.58, "elapsed_time": "1:00:29", "remaining_time": "0:02:47", "throughput": 2693.72, "total_tokens": 9778088}
7313
+ {"current_steps": 36470, "total_steps": 38150, "loss": 0.1041, "lr": 2.951309357545812e-07, "epoch": 9.559633027522935, "percentage": 95.6, "elapsed_time": "1:00:30", "remaining_time": "0:02:47", "throughput": 2693.83, "total_tokens": 9779752}
7314
+ {"current_steps": 36475, "total_steps": 38150, "loss": 0.0939, "lr": 2.9338129701673434e-07, "epoch": 9.56094364351245, "percentage": 95.61, "elapsed_time": "1:00:30", "remaining_time": "0:02:46", "throughput": 2693.9, "total_tokens": 9781368}
7315
+ {"current_steps": 36480, "total_steps": 38150, "loss": 0.0557, "lr": 2.916368293314903e-07, "epoch": 9.562254259501966, "percentage": 95.62, "elapsed_time": "1:00:31", "remaining_time": "0:02:46", "throughput": 2693.92, "total_tokens": 9782600}
7316
+ {"current_steps": 36485, "total_steps": 38150, "loss": 0.155, "lr": 2.898975330639653e-07, "epoch": 9.563564875491481, "percentage": 95.64, "elapsed_time": "1:00:31", "remaining_time": "0:02:45", "throughput": 2693.99, "total_tokens": 9784184}
7317
+ {"current_steps": 36490, "total_steps": 38150, "loss": 0.0989, "lr": 2.8816340857819034e-07, "epoch": 9.564875491480995, "percentage": 95.65, "elapsed_time": "1:00:32", "remaining_time": "0:02:45", "throughput": 2694.05, "total_tokens": 9785672}
7318
+ {"current_steps": 36495, "total_steps": 38150, "loss": 0.1609, "lr": 2.8643445623711684e-07, "epoch": 9.56618610747051, "percentage": 95.66, "elapsed_time": "1:00:32", "remaining_time": "0:02:44", "throughput": 2694.08, "total_tokens": 9786952}
7319
+ {"current_steps": 36500, "total_steps": 38150, "loss": 0.0849, "lr": 2.847106764026081e-07, "epoch": 9.567496723460026, "percentage": 95.67, "elapsed_time": "1:00:33", "remaining_time": "0:02:44", "throughput": 2694.07, "total_tokens": 9788040}
7320
+ {"current_steps": 36505, "total_steps": 38150, "loss": 0.1354, "lr": 2.8299206943545044e-07, "epoch": 9.568807339449542, "percentage": 95.69, "elapsed_time": "1:00:33", "remaining_time": "0:02:43", "throughput": 2694.08, "total_tokens": 9789144}
7321
+ {"current_steps": 36510, "total_steps": 38150, "loss": 0.0999, "lr": 2.812786356953451e-07, "epoch": 9.570117955439056, "percentage": 95.7, "elapsed_time": "1:00:33", "remaining_time": "0:02:43", "throughput": 2694.06, "total_tokens": 9790168}
7322
+ {"current_steps": 36515, "total_steps": 38150, "loss": 0.159, "lr": 2.795703755409107e-07, "epoch": 9.571428571428571, "percentage": 95.71, "elapsed_time": "1:00:34", "remaining_time": "0:02:42", "throughput": 2694.11, "total_tokens": 9791672}
7323
+ {"current_steps": 36520, "total_steps": 38150, "loss": 0.1438, "lr": 2.7786728932968354e-07, "epoch": 9.572739187418087, "percentage": 95.73, "elapsed_time": "1:00:34", "remaining_time": "0:02:42", "throughput": 2694.16, "total_tokens": 9793048}
7324
+ {"current_steps": 36525, "total_steps": 38150, "loss": 0.1704, "lr": 2.761693774181173e-07, "epoch": 9.574049803407602, "percentage": 95.74, "elapsed_time": "1:00:35", "remaining_time": "0:02:41", "throughput": 2694.24, "total_tokens": 9794648}
7325
+ {"current_steps": 36530, "total_steps": 38150, "loss": 0.0708, "lr": 2.7447664016157783e-07, "epoch": 9.575360419397116, "percentage": 95.75, "elapsed_time": "1:00:35", "remaining_time": "0:02:41", "throughput": 2694.28, "total_tokens": 9795928}
7326
+ {"current_steps": 36535, "total_steps": 38150, "loss": 0.1685, "lr": 2.727890779143566e-07, "epoch": 9.576671035386632, "percentage": 95.77, "elapsed_time": "1:00:36", "remaining_time": "0:02:40", "throughput": 2694.29, "total_tokens": 9797064}
7327
+ {"current_steps": 36540, "total_steps": 38150, "loss": 0.0851, "lr": 2.7110669102965723e-07, "epoch": 9.577981651376147, "percentage": 95.78, "elapsed_time": "1:00:36", "remaining_time": "0:02:40", "throughput": 2694.27, "total_tokens": 9798120}
7328
+ {"current_steps": 36545, "total_steps": 38150, "loss": 0.1578, "lr": 2.694294798595981e-07, "epoch": 9.579292267365663, "percentage": 95.79, "elapsed_time": "1:00:37", "remaining_time": "0:02:39", "throughput": 2694.27, "total_tokens": 9799336}
7329
+ {"current_steps": 36550, "total_steps": 38150, "loss": 0.124, "lr": 2.677574447552178e-07, "epoch": 9.580602883355176, "percentage": 95.81, "elapsed_time": "1:00:37", "remaining_time": "0:02:39", "throughput": 2694.3, "total_tokens": 9800664}
7330
+ {"current_steps": 36555, "total_steps": 38150, "loss": 0.0636, "lr": 2.660905860664697e-07, "epoch": 9.581913499344692, "percentage": 95.82, "elapsed_time": "1:00:37", "remaining_time": "0:02:38", "throughput": 2694.27, "total_tokens": 9801656}
7331
+ {"current_steps": 36560, "total_steps": 38150, "loss": 0.1255, "lr": 2.6442890414222487e-07, "epoch": 9.583224115334207, "percentage": 95.83, "elapsed_time": "1:00:38", "remaining_time": "0:02:38", "throughput": 2694.37, "total_tokens": 9803416}
7332
+ {"current_steps": 36565, "total_steps": 38150, "loss": 0.1381, "lr": 2.6277239933027163e-07, "epoch": 9.584534731323721, "percentage": 95.85, "elapsed_time": "1:00:38", "remaining_time": "0:02:37", "throughput": 2694.38, "total_tokens": 9804568}
7333
+ {"current_steps": 36570, "total_steps": 38150, "loss": 0.0579, "lr": 2.611210719773133e-07, "epoch": 9.585845347313237, "percentage": 95.86, "elapsed_time": "1:00:39", "remaining_time": "0:02:37", "throughput": 2694.34, "total_tokens": 9805528}
7334
+ {"current_steps": 36575, "total_steps": 38150, "loss": 0.0623, "lr": 2.5947492242896776e-07, "epoch": 9.587155963302752, "percentage": 95.87, "elapsed_time": "1:00:39", "remaining_time": "0:02:36", "throughput": 2694.33, "total_tokens": 9806616}
7335
+ {"current_steps": 36580, "total_steps": 38150, "loss": 0.1509, "lr": 2.5783395102977057e-07, "epoch": 9.588466579292268, "percentage": 95.88, "elapsed_time": "1:00:40", "remaining_time": "0:02:36", "throughput": 2694.37, "total_tokens": 9807944}
7336
+ {"current_steps": 36585, "total_steps": 38150, "loss": 0.1805, "lr": 2.5619815812318037e-07, "epoch": 9.589777195281782, "percentage": 95.9, "elapsed_time": "1:00:40", "remaining_time": "0:02:35", "throughput": 2694.42, "total_tokens": 9809272}
7337
+ {"current_steps": 36590, "total_steps": 38150, "loss": 0.1697, "lr": 2.5456754405155934e-07, "epoch": 9.591087811271297, "percentage": 95.91, "elapsed_time": "1:00:41", "remaining_time": "0:02:35", "throughput": 2694.52, "total_tokens": 9811080}
7338
+ {"current_steps": 36595, "total_steps": 38150, "loss": 0.1012, "lr": 2.5294210915619564e-07, "epoch": 9.592398427260813, "percentage": 95.92, "elapsed_time": "1:00:41", "remaining_time": "0:02:34", "throughput": 2694.47, "total_tokens": 9811960}
7339
+ {"current_steps": 36600, "total_steps": 38150, "loss": 0.0926, "lr": 2.5132185377728656e-07, "epoch": 9.593709043250328, "percentage": 95.94, "elapsed_time": "1:00:41", "remaining_time": "0:02:34", "throughput": 2694.46, "total_tokens": 9813048}
7340
+ {"current_steps": 36605, "total_steps": 38150, "loss": 0.1015, "lr": 2.497067782539553e-07, "epoch": 9.595019659239842, "percentage": 95.95, "elapsed_time": "1:00:42", "remaining_time": "0:02:33", "throughput": 2694.45, "total_tokens": 9814168}
7341
+ {"current_steps": 36610, "total_steps": 38150, "loss": 0.0663, "lr": 2.480968829242286e-07, "epoch": 9.596330275229358, "percentage": 95.96, "elapsed_time": "1:00:42", "remaining_time": "0:02:33", "throughput": 2694.46, "total_tokens": 9815416}
7342
+ {"current_steps": 36615, "total_steps": 38150, "loss": 0.1387, "lr": 2.4649216812505373e-07, "epoch": 9.597640891218873, "percentage": 95.98, "elapsed_time": "1:00:43", "remaining_time": "0:02:32", "throughput": 2694.48, "total_tokens": 9816632}
7343
+ {"current_steps": 36620, "total_steps": 38150, "loss": 0.0636, "lr": 2.448926341923008e-07, "epoch": 9.598951507208389, "percentage": 95.99, "elapsed_time": "1:00:43", "remaining_time": "0:02:32", "throughput": 2694.47, "total_tokens": 9817736}
7344
+ {"current_steps": 36625, "total_steps": 38150, "loss": 0.1344, "lr": 2.4329828146074095e-07, "epoch": 9.600262123197902, "percentage": 96.0, "elapsed_time": "1:00:44", "remaining_time": "0:02:31", "throughput": 2694.56, "total_tokens": 9819432}
7345
+ {"current_steps": 36630, "total_steps": 38150, "loss": 0.0971, "lr": 2.417091102640795e-07, "epoch": 9.601572739187418, "percentage": 96.02, "elapsed_time": "1:00:44", "remaining_time": "0:02:31", "throughput": 2694.65, "total_tokens": 9821096}
7346
+ {"current_steps": 36635, "total_steps": 38150, "loss": 0.2046, "lr": 2.401251209349198e-07, "epoch": 9.602883355176933, "percentage": 96.03, "elapsed_time": "1:00:45", "remaining_time": "0:02:30", "throughput": 2694.71, "total_tokens": 9822584}
7347
+ {"current_steps": 36640, "total_steps": 38150, "loss": 0.1284, "lr": 2.385463138047911e-07, "epoch": 9.604193971166449, "percentage": 96.04, "elapsed_time": "1:00:45", "remaining_time": "0:02:30", "throughput": 2694.71, "total_tokens": 9823720}
7348
+ {"current_steps": 36645, "total_steps": 38150, "loss": 0.1133, "lr": 2.3697268920413462e-07, "epoch": 9.605504587155963, "percentage": 96.06, "elapsed_time": "1:00:45", "remaining_time": "0:02:29", "throughput": 2694.75, "total_tokens": 9824984}
7349
+ {"current_steps": 36650, "total_steps": 38150, "loss": 0.2869, "lr": 2.3540424746230916e-07, "epoch": 9.606815203145478, "percentage": 96.07, "elapsed_time": "1:00:46", "remaining_time": "0:02:29", "throughput": 2694.76, "total_tokens": 9826152}
7350
+ {"current_steps": 36655, "total_steps": 38150, "loss": 0.0779, "lr": 2.3384098890758267e-07, "epoch": 9.608125819134994, "percentage": 96.08, "elapsed_time": "1:00:46", "remaining_time": "0:02:28", "throughput": 2694.75, "total_tokens": 9827224}
7351
+ {"current_steps": 36660, "total_steps": 38150, "loss": 0.0766, "lr": 2.322829138671434e-07, "epoch": 9.609436435124508, "percentage": 96.09, "elapsed_time": "1:00:47", "remaining_time": "0:02:28", "throughput": 2694.78, "total_tokens": 9828472}
7352
+ {"current_steps": 36665, "total_steps": 38150, "loss": 0.1068, "lr": 2.3073002266709443e-07, "epoch": 9.610747051114023, "percentage": 96.11, "elapsed_time": "1:00:47", "remaining_time": "0:02:27", "throughput": 2694.78, "total_tokens": 9829704}
7353
+ {"current_steps": 36670, "total_steps": 38150, "loss": 0.0725, "lr": 2.2918231563245629e-07, "epoch": 9.612057667103539, "percentage": 96.12, "elapsed_time": "1:00:48", "remaining_time": "0:02:27", "throughput": 2694.84, "total_tokens": 9831224}
7354
+ {"current_steps": 36675, "total_steps": 38150, "loss": 0.1218, "lr": 2.2763979308715877e-07, "epoch": 9.613368283093054, "percentage": 96.13, "elapsed_time": "1:00:48", "remaining_time": "0:02:26", "throughput": 2694.91, "total_tokens": 9832744}
7355
+ {"current_steps": 36680, "total_steps": 38150, "loss": 0.1073, "lr": 2.2610245535404638e-07, "epoch": 9.614678899082568, "percentage": 96.15, "elapsed_time": "1:00:49", "remaining_time": "0:02:26", "throughput": 2694.91, "total_tokens": 9833960}
7356
+ {"current_steps": 36685, "total_steps": 38150, "loss": 0.1357, "lr": 2.2457030275488676e-07, "epoch": 9.615989515072084, "percentage": 96.16, "elapsed_time": "1:00:49", "remaining_time": "0:02:25", "throughput": 2695.05, "total_tokens": 9835960}
7357
+ {"current_steps": 36690, "total_steps": 38150, "loss": 0.1159, "lr": 2.2304333561035396e-07, "epoch": 9.617300131061599, "percentage": 96.17, "elapsed_time": "1:00:50", "remaining_time": "0:02:25", "throughput": 2695.01, "total_tokens": 9836936}
7358
+ {"current_steps": 36695, "total_steps": 38150, "loss": 0.0842, "lr": 2.2152155424003952e-07, "epoch": 9.618610747051115, "percentage": 96.19, "elapsed_time": "1:00:50", "remaining_time": "0:02:24", "throughput": 2695.06, "total_tokens": 9838296}
7359
+ {"current_steps": 36700, "total_steps": 38150, "loss": 0.2818, "lr": 2.2000495896244976e-07, "epoch": 9.619921363040628, "percentage": 96.2, "elapsed_time": "1:00:50", "remaining_time": "0:02:24", "throughput": 2695.16, "total_tokens": 9839912}
7360
+ {"current_steps": 36705, "total_steps": 38150, "loss": 0.1131, "lr": 2.1849355009500582e-07, "epoch": 9.621231979030144, "percentage": 96.21, "elapsed_time": "1:00:51", "remaining_time": "0:02:23", "throughput": 2695.18, "total_tokens": 9841096}
7361
+ {"current_steps": 36710, "total_steps": 38150, "loss": 0.0888, "lr": 2.1698732795404074e-07, "epoch": 9.62254259501966, "percentage": 96.23, "elapsed_time": "1:00:51", "remaining_time": "0:02:23", "throughput": 2695.23, "total_tokens": 9842600}
7362
+ {"current_steps": 36715, "total_steps": 38150, "loss": 0.0844, "lr": 2.1548629285480792e-07, "epoch": 9.623853211009175, "percentage": 96.24, "elapsed_time": "1:00:52", "remaining_time": "0:02:22", "throughput": 2695.35, "total_tokens": 9844456}
7363
+ {"current_steps": 36720, "total_steps": 38150, "loss": 0.0828, "lr": 2.1399044511146993e-07, "epoch": 9.625163826998689, "percentage": 96.25, "elapsed_time": "1:00:52", "remaining_time": "0:02:22", "throughput": 2695.41, "total_tokens": 9845944}
7364
+ {"current_steps": 36725, "total_steps": 38150, "loss": 0.133, "lr": 2.124997850371041e-07, "epoch": 9.626474442988204, "percentage": 96.26, "elapsed_time": "1:00:53", "remaining_time": "0:02:21", "throughput": 2695.48, "total_tokens": 9847400}
7365
+ {"current_steps": 36730, "total_steps": 38150, "loss": 0.1506, "lr": 2.1101431294370533e-07, "epoch": 9.62778505897772, "percentage": 96.28, "elapsed_time": "1:00:53", "remaining_time": "0:02:21", "throughput": 2695.46, "total_tokens": 9848440}
7366
+ {"current_steps": 36735, "total_steps": 38150, "loss": 0.141, "lr": 2.0953402914217768e-07, "epoch": 9.629095674967235, "percentage": 96.29, "elapsed_time": "1:00:54", "remaining_time": "0:02:20", "throughput": 2695.5, "total_tokens": 9849816}
7367
+ {"current_steps": 36740, "total_steps": 38150, "loss": 0.1377, "lr": 2.0805893394234e-07, "epoch": 9.63040629095675, "percentage": 96.3, "elapsed_time": "1:00:54", "remaining_time": "0:02:20", "throughput": 2695.57, "total_tokens": 9851304}
7368
+ {"current_steps": 36745, "total_steps": 38150, "loss": 0.0741, "lr": 2.065890276529342e-07, "epoch": 9.631716906946265, "percentage": 96.32, "elapsed_time": "1:00:55", "remaining_time": "0:02:19", "throughput": 2695.6, "total_tokens": 9852568}
7369
+ {"current_steps": 36750, "total_steps": 38150, "loss": 0.1489, "lr": 2.051243105816031e-07, "epoch": 9.63302752293578, "percentage": 96.33, "elapsed_time": "1:00:55", "remaining_time": "0:02:19", "throughput": 2695.67, "total_tokens": 9854056}
7370
+ {"current_steps": 36755, "total_steps": 38150, "loss": 0.0996, "lr": 2.0366478303491255e-07, "epoch": 9.634338138925294, "percentage": 96.34, "elapsed_time": "1:00:55", "remaining_time": "0:02:18", "throughput": 2695.69, "total_tokens": 9855240}
7371
+ {"current_steps": 36760, "total_steps": 38150, "loss": 0.1319, "lr": 2.0221044531834043e-07, "epoch": 9.63564875491481, "percentage": 96.36, "elapsed_time": "1:00:56", "remaining_time": "0:02:18", "throughput": 2695.74, "total_tokens": 9856760}
7372
+ {"current_steps": 36765, "total_steps": 38150, "loss": 0.1103, "lr": 2.0076129773627105e-07, "epoch": 9.636959370904325, "percentage": 96.37, "elapsed_time": "1:00:56", "remaining_time": "0:02:17", "throughput": 2695.82, "total_tokens": 9858360}
7373
+ {"current_steps": 36770, "total_steps": 38150, "loss": 0.1375, "lr": 1.9931734059201457e-07, "epoch": 9.63826998689384, "percentage": 96.38, "elapsed_time": "1:00:57", "remaining_time": "0:02:17", "throughput": 2695.85, "total_tokens": 9859624}
7374
+ {"current_steps": 36775, "total_steps": 38150, "loss": 0.0601, "lr": 1.9787857418778478e-07, "epoch": 9.639580602883354, "percentage": 96.4, "elapsed_time": "1:00:57", "remaining_time": "0:02:16", "throughput": 2695.88, "total_tokens": 9860856}
7375
+ {"current_steps": 36780, "total_steps": 38150, "loss": 0.1071, "lr": 1.9644499882471578e-07, "epoch": 9.64089121887287, "percentage": 96.41, "elapsed_time": "1:00:58", "remaining_time": "0:02:16", "throughput": 2695.94, "total_tokens": 9862296}
7376
+ {"current_steps": 36785, "total_steps": 38150, "loss": 0.1217, "lr": 1.9501661480285095e-07, "epoch": 9.642201834862385, "percentage": 96.42, "elapsed_time": "1:00:58", "remaining_time": "0:02:15", "throughput": 2696.01, "total_tokens": 9863784}
7377
+ {"current_steps": 36790, "total_steps": 38150, "loss": 0.1642, "lr": 1.935934224211483e-07, "epoch": 9.643512450851901, "percentage": 96.44, "elapsed_time": "1:00:59", "remaining_time": "0:02:15", "throughput": 2696.08, "total_tokens": 9865288}
7378
+ {"current_steps": 36795, "total_steps": 38150, "loss": 0.0517, "lr": 1.9217542197748074e-07, "epoch": 9.644823066841415, "percentage": 96.45, "elapsed_time": "1:00:59", "remaining_time": "0:02:14", "throughput": 2696.1, "total_tokens": 9866504}
7379
+ {"current_steps": 36800, "total_steps": 38150, "loss": 0.1616, "lr": 1.9076261376863303e-07, "epoch": 9.64613368283093, "percentage": 96.46, "elapsed_time": "1:00:59", "remaining_time": "0:02:14", "throughput": 2696.06, "total_tokens": 9867480}
7380
+ {"current_steps": 36805, "total_steps": 38150, "loss": 0.1558, "lr": 1.8935499809030476e-07, "epoch": 9.647444298820446, "percentage": 96.47, "elapsed_time": "1:01:00", "remaining_time": "0:02:13", "throughput": 2696.17, "total_tokens": 9869256}
7381
+ {"current_steps": 36810, "total_steps": 38150, "loss": 0.0872, "lr": 1.8795257523710197e-07, "epoch": 9.648754914809961, "percentage": 96.49, "elapsed_time": "1:01:01", "remaining_time": "0:02:13", "throughput": 2696.3, "total_tokens": 9871160}
7382
+ {"current_steps": 36815, "total_steps": 38150, "loss": 0.0954, "lr": 1.8655534550255648e-07, "epoch": 9.650065530799475, "percentage": 96.5, "elapsed_time": "1:01:01", "remaining_time": "0:02:12", "throughput": 2696.35, "total_tokens": 9872504}
7383
+ {"current_steps": 36820, "total_steps": 38150, "loss": 0.0781, "lr": 1.8516330917910108e-07, "epoch": 9.65137614678899, "percentage": 96.51, "elapsed_time": "1:01:01", "remaining_time": "0:02:12", "throughput": 2696.4, "total_tokens": 9873928}
7384
+ {"current_steps": 36825, "total_steps": 38150, "loss": 0.1718, "lr": 1.8377646655808877e-07, "epoch": 9.652686762778506, "percentage": 96.53, "elapsed_time": "1:01:02", "remaining_time": "0:02:11", "throughput": 2696.46, "total_tokens": 9875368}
7385
+ {"current_steps": 36830, "total_steps": 38150, "loss": 0.104, "lr": 1.8239481792978464e-07, "epoch": 9.653997378768022, "percentage": 96.54, "elapsed_time": "1:01:02", "remaining_time": "0:02:11", "throughput": 2696.49, "total_tokens": 9876584}
7386
+ {"current_steps": 36835, "total_steps": 38150, "loss": 0.0959, "lr": 1.8101836358336287e-07, "epoch": 9.655307994757536, "percentage": 96.55, "elapsed_time": "1:01:03", "remaining_time": "0:02:10", "throughput": 2696.63, "total_tokens": 9878584}
7387
+ {"current_steps": 36840, "total_steps": 38150, "loss": 0.1316, "lr": 1.7964710380691251e-07, "epoch": 9.656618610747051, "percentage": 96.57, "elapsed_time": "1:01:03", "remaining_time": "0:02:10", "throughput": 2696.57, "total_tokens": 9879432}
7388
+ {"current_steps": 36845, "total_steps": 38150, "loss": 0.1001, "lr": 1.782810388874373e-07, "epoch": 9.657929226736567, "percentage": 96.58, "elapsed_time": "1:01:04", "remaining_time": "0:02:09", "throughput": 2696.61, "total_tokens": 9880744}
7389
+ {"current_steps": 36850, "total_steps": 38150, "loss": 0.1628, "lr": 1.7692016911085295e-07, "epoch": 9.65923984272608, "percentage": 96.59, "elapsed_time": "1:01:04", "remaining_time": "0:02:09", "throughput": 2696.66, "total_tokens": 9882280}
7390
+ {"current_steps": 36855, "total_steps": 38150, "loss": 0.192, "lr": 1.7556449476198445e-07, "epoch": 9.660550458715596, "percentage": 96.61, "elapsed_time": "1:01:05", "remaining_time": "0:02:08", "throughput": 2696.73, "total_tokens": 9883816}
7391
+ {"current_steps": 36860, "total_steps": 38150, "loss": 0.1603, "lr": 1.7421401612457423e-07, "epoch": 9.661861074705111, "percentage": 96.62, "elapsed_time": "1:01:05", "remaining_time": "0:02:08", "throughput": 2696.84, "total_tokens": 9885512}
7392
+ {"current_steps": 36865, "total_steps": 38150, "loss": 0.0897, "lr": 1.7286873348127676e-07, "epoch": 9.663171690694627, "percentage": 96.63, "elapsed_time": "1:01:06", "remaining_time": "0:02:07", "throughput": 2696.83, "total_tokens": 9886600}
7393
+ {"current_steps": 36870, "total_steps": 38150, "loss": 0.0996, "lr": 1.715286471136529e-07, "epoch": 9.66448230668414, "percentage": 96.64, "elapsed_time": "1:01:06", "remaining_time": "0:02:07", "throughput": 2696.81, "total_tokens": 9887592}
7394
+ {"current_steps": 36875, "total_steps": 38150, "loss": 0.0913, "lr": 1.7019375730218667e-07, "epoch": 9.665792922673656, "percentage": 96.66, "elapsed_time": "1:01:06", "remaining_time": "0:02:06", "throughput": 2696.76, "total_tokens": 9888504}
7395
+ {"current_steps": 36880, "total_steps": 38150, "loss": 0.1361, "lr": 1.688640643262629e-07, "epoch": 9.667103538663172, "percentage": 96.67, "elapsed_time": "1:01:07", "remaining_time": "0:02:06", "throughput": 2696.77, "total_tokens": 9889688}
7396
+ {"current_steps": 36885, "total_steps": 38150, "loss": 0.2183, "lr": 1.6753956846418395e-07, "epoch": 9.668414154652687, "percentage": 96.68, "elapsed_time": "1:01:07", "remaining_time": "0:02:05", "throughput": 2696.85, "total_tokens": 9891304}
7397
+ {"current_steps": 36890, "total_steps": 38150, "loss": 0.1017, "lr": 1.6622026999316697e-07, "epoch": 9.669724770642201, "percentage": 96.7, "elapsed_time": "1:01:08", "remaining_time": "0:02:05", "throughput": 2696.97, "total_tokens": 9893064}
7398
+ {"current_steps": 36895, "total_steps": 38150, "loss": 0.1402, "lr": 1.6490616918933554e-07, "epoch": 9.671035386631717, "percentage": 96.71, "elapsed_time": "1:01:08", "remaining_time": "0:02:04", "throughput": 2697.06, "total_tokens": 9894680}
7399
+ {"current_steps": 36900, "total_steps": 38150, "loss": 0.129, "lr": 1.6359726632773353e-07, "epoch": 9.672346002621232, "percentage": 96.72, "elapsed_time": "1:01:09", "remaining_time": "0:02:04", "throughput": 2697.09, "total_tokens": 9896056}
7400
+ {"current_steps": 36905, "total_steps": 38150, "loss": 0.1671, "lr": 1.6229356168231125e-07, "epoch": 9.673656618610748, "percentage": 96.74, "elapsed_time": "1:01:09", "remaining_time": "0:02:03", "throughput": 2697.14, "total_tokens": 9897448}
7401
+ {"current_steps": 36910, "total_steps": 38150, "loss": 0.1519, "lr": 1.609950555259282e-07, "epoch": 9.674967234600262, "percentage": 96.75, "elapsed_time": "1:01:10", "remaining_time": "0:02:03", "throughput": 2697.24, "total_tokens": 9899128}
7402
+ {"current_steps": 36915, "total_steps": 38150, "loss": 0.1519, "lr": 1.5970174813036144e-07, "epoch": 9.676277850589777, "percentage": 96.76, "elapsed_time": "1:01:10", "remaining_time": "0:02:02", "throughput": 2697.29, "total_tokens": 9900552}
7403
+ {"current_steps": 36920, "total_steps": 38150, "loss": 0.1652, "lr": 1.5841363976629998e-07, "epoch": 9.677588466579293, "percentage": 96.78, "elapsed_time": "1:01:10", "remaining_time": "0:02:02", "throughput": 2697.31, "total_tokens": 9901784}
7404
+ {"current_steps": 36925, "total_steps": 38150, "loss": 0.1005, "lr": 1.5713073070333927e-07, "epoch": 9.678899082568808, "percentage": 96.79, "elapsed_time": "1:01:11", "remaining_time": "0:02:01", "throughput": 2697.31, "total_tokens": 9902968}
7405
+ {"current_steps": 36930, "total_steps": 38150, "loss": 0.14, "lr": 1.5585302120998956e-07, "epoch": 9.680209698558322, "percentage": 96.8, "elapsed_time": "1:01:11", "remaining_time": "0:02:01", "throughput": 2697.38, "total_tokens": 9904472}
7406
+ {"current_steps": 36935, "total_steps": 38150, "loss": 0.1206, "lr": 1.545805115536786e-07, "epoch": 9.681520314547837, "percentage": 96.82, "elapsed_time": "1:01:12", "remaining_time": "0:02:00", "throughput": 2697.4, "total_tokens": 9905768}
7407
+ {"current_steps": 36940, "total_steps": 38150, "loss": 0.11, "lr": 1.5331320200073497e-07, "epoch": 9.682830930537353, "percentage": 96.83, "elapsed_time": "1:01:12", "remaining_time": "0:02:00", "throughput": 2697.55, "total_tokens": 9907912}
7408
+ {"current_steps": 36945, "total_steps": 38150, "loss": 0.0552, "lr": 1.520510928164076e-07, "epoch": 9.684141546526867, "percentage": 96.84, "elapsed_time": "1:01:13", "remaining_time": "0:01:59", "throughput": 2697.54, "total_tokens": 9908984}
7409
+ {"current_steps": 36950, "total_steps": 38150, "loss": 0.1837, "lr": 1.5079418426485193e-07, "epoch": 9.685452162516382, "percentage": 96.85, "elapsed_time": "1:01:13", "remaining_time": "0:01:59", "throughput": 2697.56, "total_tokens": 9910216}
7410
+ {"current_steps": 36955, "total_steps": 38150, "loss": 0.257, "lr": 1.49542476609138e-07, "epoch": 9.686762778505898, "percentage": 96.87, "elapsed_time": "1:01:14", "remaining_time": "0:01:58", "throughput": 2697.58, "total_tokens": 9911544}
7411
+ {"current_steps": 36960, "total_steps": 38150, "loss": 0.1452, "lr": 1.482959701112452e-07, "epoch": 9.688073394495413, "percentage": 96.88, "elapsed_time": "1:01:14", "remaining_time": "0:01:58", "throughput": 2697.66, "total_tokens": 9913352}
7412
+ {"current_steps": 36965, "total_steps": 38150, "loss": 0.091, "lr": 1.47054665032062e-07, "epoch": 9.689384010484927, "percentage": 96.89, "elapsed_time": "1:01:15", "remaining_time": "0:01:57", "throughput": 2697.64, "total_tokens": 9914376}
7413
+ {"current_steps": 36970, "total_steps": 38150, "loss": 0.111, "lr": 1.4581856163140008e-07, "epoch": 9.690694626474443, "percentage": 96.91, "elapsed_time": "1:01:15", "remaining_time": "0:01:57", "throughput": 2697.62, "total_tokens": 9915448}
7414
+ {"current_steps": 36975, "total_steps": 38150, "loss": 0.1627, "lr": 1.4458766016796632e-07, "epoch": 9.692005242463958, "percentage": 96.92, "elapsed_time": "1:01:16", "remaining_time": "0:01:56", "throughput": 2697.59, "total_tokens": 9916936}
7415
+ {"current_steps": 36980, "total_steps": 38150, "loss": 0.1168, "lr": 1.4336196089938802e-07, "epoch": 9.693315858453474, "percentage": 96.93, "elapsed_time": "1:01:16", "remaining_time": "0:01:56", "throughput": 2697.62, "total_tokens": 9918200}
7416
+ {"current_steps": 36985, "total_steps": 38150, "loss": 0.1793, "lr": 1.4214146408220163e-07, "epoch": 9.694626474442988, "percentage": 96.95, "elapsed_time": "1:01:17", "remaining_time": "0:01:55", "throughput": 2697.77, "total_tokens": 9920120}
7417
+ {"current_steps": 36990, "total_steps": 38150, "loss": 0.1296, "lr": 1.4092616997185837e-07, "epoch": 9.695937090432503, "percentage": 96.96, "elapsed_time": "1:01:17", "remaining_time": "0:01:55", "throughput": 2697.8, "total_tokens": 9921464}
7418
+ {"current_steps": 36995, "total_steps": 38150, "loss": 0.2593, "lr": 1.397160788227131e-07, "epoch": 9.697247706422019, "percentage": 96.97, "elapsed_time": "1:01:18", "remaining_time": "0:01:54", "throughput": 2697.89, "total_tokens": 9923128}
7419
+ {"current_steps": 37000, "total_steps": 38150, "loss": 0.1757, "lr": 1.385111908880382e-07, "epoch": 9.698558322411534, "percentage": 96.99, "elapsed_time": "1:01:18", "remaining_time": "0:01:54", "throughput": 2697.92, "total_tokens": 9924408}
7420
+ {"current_steps": 37005, "total_steps": 38150, "loss": 0.1676, "lr": 1.3731150642001255e-07, "epoch": 9.699868938401048, "percentage": 97.0, "elapsed_time": "1:01:19", "remaining_time": "0:01:53", "throughput": 2697.99, "total_tokens": 9925912}
7421
+ {"current_steps": 37010, "total_steps": 38150, "loss": 0.1013, "lr": 1.3611702566972694e-07, "epoch": 9.701179554390563, "percentage": 97.01, "elapsed_time": "1:01:19", "remaining_time": "0:01:53", "throughput": 2697.98, "total_tokens": 9927000}
7422
+ {"current_steps": 37015, "total_steps": 38150, "loss": 0.1303, "lr": 1.3492774888718974e-07, "epoch": 9.702490170380079, "percentage": 97.02, "elapsed_time": "1:01:19", "remaining_time": "0:01:52", "throughput": 2697.94, "total_tokens": 9927992}
7423
+ {"current_steps": 37020, "total_steps": 38150, "loss": 0.2199, "lr": 1.3374367632131014e-07, "epoch": 9.703800786369595, "percentage": 97.04, "elapsed_time": "1:01:20", "remaining_time": "0:01:52", "throughput": 2698.02, "total_tokens": 9929448}
7424
+ {"current_steps": 37025, "total_steps": 38150, "loss": 0.1321, "lr": 1.3256480821991213e-07, "epoch": 9.705111402359108, "percentage": 97.05, "elapsed_time": "1:01:20", "remaining_time": "0:01:51", "throughput": 2698.06, "total_tokens": 9930808}
7425
+ {"current_steps": 37030, "total_steps": 38150, "loss": 0.1686, "lr": 1.3139114482973448e-07, "epoch": 9.706422018348624, "percentage": 97.06, "elapsed_time": "1:01:21", "remaining_time": "0:01:51", "throughput": 2698.11, "total_tokens": 9932232}
7426
+ {"current_steps": 37035, "total_steps": 38150, "loss": 0.1825, "lr": 1.3022268639641956e-07, "epoch": 9.70773263433814, "percentage": 97.08, "elapsed_time": "1:01:21", "remaining_time": "0:01:50", "throughput": 2698.15, "total_tokens": 9933656}
7427
+ {"current_steps": 37040, "total_steps": 38150, "loss": 0.1237, "lr": 1.290594331645245e-07, "epoch": 9.709043250327653, "percentage": 97.09, "elapsed_time": "1:01:22", "remaining_time": "0:01:50", "throughput": 2698.23, "total_tokens": 9935368}
7428
+ {"current_steps": 37045, "total_steps": 38150, "loss": 0.0451, "lr": 1.279013853775185e-07, "epoch": 9.710353866317169, "percentage": 97.1, "elapsed_time": "1:01:22", "remaining_time": "0:01:49", "throughput": 2698.2, "total_tokens": 9936376}
7429
+ {"current_steps": 37050, "total_steps": 38150, "loss": 0.1491, "lr": 1.267485432777743e-07, "epoch": 9.711664482306684, "percentage": 97.12, "elapsed_time": "1:01:23", "remaining_time": "0:01:49", "throughput": 2698.19, "total_tokens": 9937464}
7430
+ {"current_steps": 37055, "total_steps": 38150, "loss": 0.0893, "lr": 1.2560090710658223e-07, "epoch": 9.7129750982962, "percentage": 97.13, "elapsed_time": "1:01:23", "remaining_time": "0:01:48", "throughput": 2698.21, "total_tokens": 9938696}
7431
+ {"current_steps": 37060, "total_steps": 38150, "loss": 0.018, "lr": 1.2445847710414183e-07, "epoch": 9.714285714285714, "percentage": 97.14, "elapsed_time": "1:01:23", "remaining_time": "0:01:48", "throughput": 2698.18, "total_tokens": 9939656}
7432
+ {"current_steps": 37065, "total_steps": 38150, "loss": 0.1037, "lr": 1.233212535095618e-07, "epoch": 9.715596330275229, "percentage": 97.16, "elapsed_time": "1:01:24", "remaining_time": "0:01:47", "throughput": 2698.19, "total_tokens": 9940888}
7433
+ {"current_steps": 37070, "total_steps": 38150, "loss": 0.0728, "lr": 1.2218923656085735e-07, "epoch": 9.716906946264745, "percentage": 97.17, "elapsed_time": "1:01:24", "remaining_time": "0:01:47", "throughput": 2698.2, "total_tokens": 9942024}
7434
+ {"current_steps": 37075, "total_steps": 38150, "loss": 0.1347, "lr": 1.2106242649496112e-07, "epoch": 9.71821756225426, "percentage": 97.18, "elapsed_time": "1:01:25", "remaining_time": "0:01:46", "throughput": 2698.22, "total_tokens": 9943336}
7435
+ {"current_steps": 37080, "total_steps": 38150, "loss": 0.3417, "lr": 1.199408235477123e-07, "epoch": 9.719528178243774, "percentage": 97.2, "elapsed_time": "1:01:25", "remaining_time": "0:01:46", "throughput": 2698.33, "total_tokens": 9945096}
7436
+ {"current_steps": 37085, "total_steps": 38150, "loss": 0.1436, "lr": 1.188244279538564e-07, "epoch": 9.72083879423329, "percentage": 97.21, "elapsed_time": "1:01:26", "remaining_time": "0:01:45", "throughput": 2698.36, "total_tokens": 9946344}
7437
+ {"current_steps": 37090, "total_steps": 38150, "loss": 0.0855, "lr": 1.1771323994705929e-07, "epoch": 9.722149410222805, "percentage": 97.22, "elapsed_time": "1:01:26", "remaining_time": "0:01:45", "throughput": 2698.38, "total_tokens": 9947688}
7438
+ {"current_steps": 37095, "total_steps": 38150, "loss": 0.1388, "lr": 1.1660725975988773e-07, "epoch": 9.72346002621232, "percentage": 97.23, "elapsed_time": "1:01:27", "remaining_time": "0:01:44", "throughput": 2698.53, "total_tokens": 9949816}
7439
+ {"current_steps": 37100, "total_steps": 38150, "loss": 0.2345, "lr": 1.1550648762382044e-07, "epoch": 9.724770642201834, "percentage": 97.25, "elapsed_time": "1:01:27", "remaining_time": "0:01:44", "throughput": 2698.57, "total_tokens": 9951112}
7440
+ {"current_steps": 37105, "total_steps": 38150, "loss": 0.1289, "lr": 1.144109237692509e-07, "epoch": 9.72608125819135, "percentage": 97.26, "elapsed_time": "1:01:27", "remaining_time": "0:01:43", "throughput": 2698.59, "total_tokens": 9952312}
7441
+ {"current_steps": 37110, "total_steps": 38150, "loss": 0.1475, "lr": 1.1332056842547344e-07, "epoch": 9.727391874180865, "percentage": 97.27, "elapsed_time": "1:01:28", "remaining_time": "0:01:43", "throughput": 2698.58, "total_tokens": 9953448}
7442
+ {"current_steps": 37115, "total_steps": 38150, "loss": 0.1814, "lr": 1.1223542182070002e-07, "epoch": 9.728702490170381, "percentage": 97.29, "elapsed_time": "1:01:28", "remaining_time": "0:01:42", "throughput": 2698.66, "total_tokens": 9954936}
7443
+ {"current_steps": 37120, "total_steps": 38150, "loss": 0.2168, "lr": 1.1115548418205168e-07, "epoch": 9.730013106159895, "percentage": 97.3, "elapsed_time": "1:01:29", "remaining_time": "0:01:42", "throughput": 2698.69, "total_tokens": 9956216}
7444
+ {"current_steps": 37125, "total_steps": 38150, "loss": 0.1583, "lr": 1.1008075573555599e-07, "epoch": 9.73132372214941, "percentage": 97.31, "elapsed_time": "1:01:29", "remaining_time": "0:01:41", "throughput": 2698.7, "total_tokens": 9957416}
7445
+ {"current_steps": 37130, "total_steps": 38150, "loss": 0.0764, "lr": 1.0901123670614965e-07, "epoch": 9.732634338138926, "percentage": 97.33, "elapsed_time": "1:01:30", "remaining_time": "0:01:41", "throughput": 2698.69, "total_tokens": 9958504}
7446
+ {"current_steps": 37135, "total_steps": 38150, "loss": 0.1582, "lr": 1.0794692731768419e-07, "epoch": 9.73394495412844, "percentage": 97.34, "elapsed_time": "1:01:30", "remaining_time": "0:01:40", "throughput": 2698.72, "total_tokens": 9959784}
7447
+ {"current_steps": 37140, "total_steps": 38150, "loss": 0.2522, "lr": 1.0688782779291473e-07, "epoch": 9.735255570117955, "percentage": 97.35, "elapsed_time": "1:01:31", "remaining_time": "0:01:40", "throughput": 2698.82, "total_tokens": 9961576}
7448
+ {"current_steps": 37145, "total_steps": 38150, "loss": 0.0973, "lr": 1.0583393835351396e-07, "epoch": 9.73656618610747, "percentage": 97.37, "elapsed_time": "1:01:31", "remaining_time": "0:01:39", "throughput": 2698.88, "total_tokens": 9963032}
7449
+ {"current_steps": 37150, "total_steps": 38150, "loss": 0.076, "lr": 1.0478525922005545e-07, "epoch": 9.737876802096986, "percentage": 97.38, "elapsed_time": "1:01:31", "remaining_time": "0:01:39", "throughput": 2698.86, "total_tokens": 9964056}
7450
+ {"current_steps": 37155, "total_steps": 38150, "loss": 0.0456, "lr": 1.0374179061202749e-07, "epoch": 9.7391874180865, "percentage": 97.39, "elapsed_time": "1:01:32", "remaining_time": "0:01:38", "throughput": 2698.89, "total_tokens": 9965368}
7451
+ {"current_steps": 37160, "total_steps": 38150, "loss": 0.1874, "lr": 1.0270353274782207e-07, "epoch": 9.740498034076015, "percentage": 97.4, "elapsed_time": "1:01:32", "remaining_time": "0:01:38", "throughput": 2698.91, "total_tokens": 9966680}
7452
+ {"current_steps": 37165, "total_steps": 38150, "loss": 0.1056, "lr": 1.0167048584475147e-07, "epoch": 9.741808650065531, "percentage": 97.42, "elapsed_time": "1:01:33", "remaining_time": "0:01:37", "throughput": 2698.91, "total_tokens": 9967864}
7453
+ {"current_steps": 37170, "total_steps": 38150, "loss": 0.1259, "lr": 1.006426501190233e-07, "epoch": 9.743119266055047, "percentage": 97.43, "elapsed_time": "1:01:33", "remaining_time": "0:01:37", "throughput": 2698.99, "total_tokens": 9969448}
7454
+ {"current_steps": 37175, "total_steps": 38150, "loss": 0.0991, "lr": 9.962002578576823e-08, "epoch": 9.74442988204456, "percentage": 97.44, "elapsed_time": "1:01:34", "remaining_time": "0:01:36", "throughput": 2699.01, "total_tokens": 9970712}
7455
+ {"current_steps": 37180, "total_steps": 38150, "loss": 0.1242, "lr": 9.860261305901785e-08, "epoch": 9.745740498034076, "percentage": 97.46, "elapsed_time": "1:01:34", "remaining_time": "0:01:36", "throughput": 2698.98, "total_tokens": 9971736}
7456
+ {"current_steps": 37185, "total_steps": 38150, "loss": 0.1056, "lr": 9.759041215171295e-08, "epoch": 9.747051114023591, "percentage": 97.47, "elapsed_time": "1:01:35", "remaining_time": "0:01:35", "throughput": 2698.98, "total_tokens": 9972872}
7457
+ {"current_steps": 37190, "total_steps": 38150, "loss": 0.2933, "lr": 9.658342327570902e-08, "epoch": 9.748361730013107, "percentage": 97.48, "elapsed_time": "1:01:35", "remaining_time": "0:01:35", "throughput": 2698.93, "total_tokens": 9973784}
7458
+ {"current_steps": 37195, "total_steps": 38150, "loss": 0.0825, "lr": 9.55816466417625e-08, "epoch": 9.74967234600262, "percentage": 97.5, "elapsed_time": "1:01:35", "remaining_time": "0:01:34", "throughput": 2698.92, "total_tokens": 9974840}
7459
+ {"current_steps": 37200, "total_steps": 38150, "loss": 0.1526, "lr": 9.458508245954456e-08, "epoch": 9.750982961992136, "percentage": 97.51, "elapsed_time": "1:01:36", "remaining_time": "0:01:34", "throughput": 2698.94, "total_tokens": 9976088}
7460
+ {"current_steps": 37205, "total_steps": 38150, "loss": 0.1379, "lr": 9.359373093763835e-08, "epoch": 9.752293577981652, "percentage": 97.52, "elapsed_time": "1:01:36", "remaining_time": "0:01:33", "throughput": 2698.98, "total_tokens": 9977432}
7461
+ {"current_steps": 37210, "total_steps": 38150, "loss": 0.189, "lr": 9.260759228352789e-08, "epoch": 9.753604193971167, "percentage": 97.54, "elapsed_time": "1:01:37", "remaining_time": "0:01:33", "throughput": 2698.95, "total_tokens": 9978392}
7462
+ {"current_steps": 37215, "total_steps": 38150, "loss": 0.2042, "lr": 9.162666670361198e-08, "epoch": 9.754914809960681, "percentage": 97.55, "elapsed_time": "1:01:37", "remaining_time": "0:01:32", "throughput": 2699.04, "total_tokens": 9980024}
7463
+ {"current_steps": 37220, "total_steps": 38150, "loss": 0.1817, "lr": 9.065095440319582e-08, "epoch": 9.756225425950197, "percentage": 97.56, "elapsed_time": "1:01:38", "remaining_time": "0:01:32", "throughput": 2699.08, "total_tokens": 9981416}
7464
+ {"current_steps": 37225, "total_steps": 38150, "loss": 0.0804, "lr": 8.968045558649663e-08, "epoch": 9.757536041939712, "percentage": 97.58, "elapsed_time": "1:01:38", "remaining_time": "0:01:31", "throughput": 2699.11, "total_tokens": 9982664}
7465
+ {"current_steps": 37230, "total_steps": 38150, "loss": 0.081, "lr": 8.871517045663524e-08, "epoch": 9.758846657929226, "percentage": 97.59, "elapsed_time": "1:01:38", "remaining_time": "0:01:31", "throughput": 2699.16, "total_tokens": 9984040}
7466
+ {"current_steps": 37235, "total_steps": 38150, "loss": 0.1813, "lr": 8.775509921564728e-08, "epoch": 9.760157273918741, "percentage": 97.6, "elapsed_time": "1:01:39", "remaining_time": "0:01:30", "throughput": 2699.19, "total_tokens": 9985400}
7467
+ {"current_steps": 37240, "total_steps": 38150, "loss": 0.2269, "lr": 8.680024206446924e-08, "epoch": 9.761467889908257, "percentage": 97.61, "elapsed_time": "1:01:39", "remaining_time": "0:01:30", "throughput": 2699.2, "total_tokens": 9986584}
7468
+ {"current_steps": 37245, "total_steps": 38150, "loss": 0.1774, "lr": 8.585059920295512e-08, "epoch": 9.762778505897773, "percentage": 97.63, "elapsed_time": "1:01:40", "remaining_time": "0:01:29", "throughput": 2699.26, "total_tokens": 9988088}
7469
+ {"current_steps": 37250, "total_steps": 38150, "loss": 0.1763, "lr": 8.490617082985986e-08, "epoch": 9.764089121887286, "percentage": 97.64, "elapsed_time": "1:01:40", "remaining_time": "0:01:29", "throughput": 2699.25, "total_tokens": 9989192}
7470
+ {"current_steps": 37255, "total_steps": 38150, "loss": 0.1137, "lr": 8.396695714285585e-08, "epoch": 9.765399737876802, "percentage": 97.65, "elapsed_time": "1:01:41", "remaining_time": "0:01:28", "throughput": 2699.27, "total_tokens": 9990392}
7471
+ {"current_steps": 37260, "total_steps": 38150, "loss": 0.1553, "lr": 8.303295833851365e-08, "epoch": 9.766710353866317, "percentage": 97.67, "elapsed_time": "1:01:41", "remaining_time": "0:01:28", "throughput": 2699.29, "total_tokens": 9991608}
7472
+ {"current_steps": 37265, "total_steps": 38150, "loss": 0.1568, "lr": 8.210417461232412e-08, "epoch": 9.768020969855833, "percentage": 97.68, "elapsed_time": "1:01:42", "remaining_time": "0:01:27", "throughput": 2699.32, "total_tokens": 9992968}
7473
+ {"current_steps": 37270, "total_steps": 38150, "loss": 0.1167, "lr": 8.11806061586734e-08, "epoch": 9.769331585845347, "percentage": 97.69, "elapsed_time": "1:01:42", "remaining_time": "0:01:27", "throughput": 2699.36, "total_tokens": 9994280}
7474
+ {"current_steps": 37275, "total_steps": 38150, "loss": 0.1431, "lr": 8.026225317086522e-08, "epoch": 9.770642201834862, "percentage": 97.71, "elapsed_time": "1:01:42", "remaining_time": "0:01:26", "throughput": 2699.41, "total_tokens": 9995736}
7475
+ {"current_steps": 37280, "total_steps": 38150, "loss": 0.1556, "lr": 7.934911584110971e-08, "epoch": 9.771952817824378, "percentage": 97.72, "elapsed_time": "1:01:43", "remaining_time": "0:01:26", "throughput": 2699.45, "total_tokens": 9997064}
7476
+ {"current_steps": 37285, "total_steps": 38150, "loss": 0.0886, "lr": 7.844119436052622e-08, "epoch": 9.773263433813893, "percentage": 97.73, "elapsed_time": "1:01:43", "remaining_time": "0:01:25", "throughput": 2699.43, "total_tokens": 9998072}
7477
+ {"current_steps": 37290, "total_steps": 38150, "loss": 0.1376, "lr": 7.753848891913772e-08, "epoch": 9.774574049803407, "percentage": 97.75, "elapsed_time": "1:01:44", "remaining_time": "0:01:25", "throughput": 2699.48, "total_tokens": 9999416}
7478
+ {"current_steps": 37295, "total_steps": 38150, "loss": 0.1149, "lr": 7.664099970588201e-08, "epoch": 9.775884665792923, "percentage": 97.76, "elapsed_time": "1:01:44", "remaining_time": "0:01:24", "throughput": 2699.49, "total_tokens": 10000648}
7479
+ {"current_steps": 37300, "total_steps": 38150, "loss": 0.0411, "lr": 7.57487269085977e-08, "epoch": 9.777195281782438, "percentage": 97.77, "elapsed_time": "1:01:45", "remaining_time": "0:01:24", "throughput": 2699.43, "total_tokens": 10001544}
7480
+ {"current_steps": 37305, "total_steps": 38150, "loss": 0.0709, "lr": 7.486167071404371e-08, "epoch": 9.778505897771954, "percentage": 97.79, "elapsed_time": "1:01:45", "remaining_time": "0:01:23", "throughput": 2699.42, "total_tokens": 10002648}
7481
+ {"current_steps": 37310, "total_steps": 38150, "loss": 0.1286, "lr": 7.397983130787156e-08, "epoch": 9.779816513761467, "percentage": 97.8, "elapsed_time": "1:01:45", "remaining_time": "0:01:23", "throughput": 2699.46, "total_tokens": 10004024}
7482
+ {"current_steps": 37315, "total_steps": 38150, "loss": 0.1637, "lr": 7.310320887465305e-08, "epoch": 9.781127129750983, "percentage": 97.81, "elapsed_time": "1:01:46", "remaining_time": "0:01:22", "throughput": 2699.47, "total_tokens": 10005288}
7483
+ {"current_steps": 37320, "total_steps": 38150, "loss": 0.1608, "lr": 7.223180359786086e-08, "epoch": 9.782437745740499, "percentage": 97.82, "elapsed_time": "1:01:46", "remaining_time": "0:01:22", "throughput": 2699.51, "total_tokens": 10006600}
7484
+ {"current_steps": 37325, "total_steps": 38150, "loss": 0.156, "lr": 7.136561565988242e-08, "epoch": 9.783748361730012, "percentage": 97.84, "elapsed_time": "1:01:47", "remaining_time": "0:01:21", "throughput": 2699.62, "total_tokens": 10008328}
7485
+ {"current_steps": 37330, "total_steps": 38150, "loss": 0.1212, "lr": 7.050464524200607e-08, "epoch": 9.785058977719528, "percentage": 97.85, "elapsed_time": "1:01:47", "remaining_time": "0:01:21", "throughput": 2699.64, "total_tokens": 10009528}
7486
+ {"current_steps": 37335, "total_steps": 38150, "loss": 0.0843, "lr": 6.964889252443485e-08, "epoch": 9.786369593709043, "percentage": 97.86, "elapsed_time": "1:01:48", "remaining_time": "0:01:20", "throughput": 2699.68, "total_tokens": 10010920}
7487
+ {"current_steps": 37340, "total_steps": 38150, "loss": 0.2713, "lr": 6.879835768627274e-08, "epoch": 9.787680209698559, "percentage": 97.88, "elapsed_time": "1:01:48", "remaining_time": "0:01:20", "throughput": 2699.78, "total_tokens": 10012584}
7488
+ {"current_steps": 37345, "total_steps": 38150, "loss": 0.1341, "lr": 6.795304090553567e-08, "epoch": 9.788990825688073, "percentage": 97.89, "elapsed_time": "1:01:49", "remaining_time": "0:01:19", "throughput": 2699.81, "total_tokens": 10013864}
7489
+ {"current_steps": 37350, "total_steps": 38150, "loss": 0.1896, "lr": 6.711294235914877e-08, "epoch": 9.790301441677588, "percentage": 97.9, "elapsed_time": "1:01:49", "remaining_time": "0:01:19", "throughput": 2699.96, "total_tokens": 10016008}
7490
+ {"current_steps": 37355, "total_steps": 38150, "loss": 0.0877, "lr": 6.627806222294086e-08, "epoch": 9.791612057667104, "percentage": 97.92, "elapsed_time": "1:01:50", "remaining_time": "0:01:18", "throughput": 2699.97, "total_tokens": 10017176}
7491
+ {"current_steps": 37360, "total_steps": 38150, "loss": 0.0864, "lr": 6.544840067165548e-08, "epoch": 9.79292267365662, "percentage": 97.93, "elapsed_time": "1:01:50", "remaining_time": "0:01:18", "throughput": 2700.02, "total_tokens": 10018584}
7492
+ {"current_steps": 37365, "total_steps": 38150, "loss": 0.1402, "lr": 6.462395787893427e-08, "epoch": 9.794233289646133, "percentage": 97.94, "elapsed_time": "1:01:51", "remaining_time": "0:01:17", "throughput": 2700.13, "total_tokens": 10020392}
7493
+ {"current_steps": 37370, "total_steps": 38150, "loss": 0.1691, "lr": 6.380473401733366e-08, "epoch": 9.795543905635649, "percentage": 97.96, "elapsed_time": "1:01:51", "remaining_time": "0:01:17", "throughput": 2700.27, "total_tokens": 10022424}
7494
+ {"current_steps": 37375, "total_steps": 38150, "loss": 0.1082, "lr": 6.299072925831373e-08, "epoch": 9.796854521625164, "percentage": 97.97, "elapsed_time": "1:01:52", "remaining_time": "0:01:16", "throughput": 2700.39, "total_tokens": 10024296}
7495
+ {"current_steps": 37380, "total_steps": 38150, "loss": 0.1706, "lr": 6.218194377224928e-08, "epoch": 9.79816513761468, "percentage": 97.98, "elapsed_time": "1:01:52", "remaining_time": "0:01:16", "throughput": 2700.37, "total_tokens": 10025304}
7496
+ {"current_steps": 37385, "total_steps": 38150, "loss": 0.0489, "lr": 6.137837772841326e-08, "epoch": 9.799475753604193, "percentage": 97.99, "elapsed_time": "1:01:52", "remaining_time": "0:01:15", "throughput": 2700.34, "total_tokens": 10026344}
7497
+ {"current_steps": 37390, "total_steps": 38150, "loss": 0.1299, "lr": 6.058003129499334e-08, "epoch": 9.800786369593709, "percentage": 98.01, "elapsed_time": "1:01:53", "remaining_time": "0:01:15", "throughput": 2700.35, "total_tokens": 10027464}
7498
+ {"current_steps": 37395, "total_steps": 38150, "loss": 0.1219, "lr": 5.978690463908088e-08, "epoch": 9.802096985583225, "percentage": 98.02, "elapsed_time": "1:01:53", "remaining_time": "0:01:14", "throughput": 2700.37, "total_tokens": 10028680}
7499
+ {"current_steps": 37400, "total_steps": 38150, "loss": 0.1695, "lr": 5.8998997926676405e-08, "epoch": 9.80340760157274, "percentage": 98.03, "elapsed_time": "1:01:54", "remaining_time": "0:01:14", "throughput": 2700.39, "total_tokens": 10029928}
7500
+ {"current_steps": 37405, "total_steps": 38150, "loss": 0.125, "lr": 5.821631132268412e-08, "epoch": 9.804718217562254, "percentage": 98.05, "elapsed_time": "1:01:54", "remaining_time": "0:01:13", "throughput": 2700.41, "total_tokens": 10031208}
7501
+ {"current_steps": 37410, "total_steps": 38150, "loss": 0.0834, "lr": 5.743884499092578e-08, "epoch": 9.80602883355177, "percentage": 98.06, "elapsed_time": "1:01:55", "remaining_time": "0:01:13", "throughput": 2700.37, "total_tokens": 10032136}
7502
+ {"current_steps": 37415, "total_steps": 38150, "loss": 0.1099, "lr": 5.6666599094115646e-08, "epoch": 9.807339449541285, "percentage": 98.07, "elapsed_time": "1:01:55", "remaining_time": "0:01:12", "throughput": 2700.35, "total_tokens": 10033176}
7503
+ {"current_steps": 37420, "total_steps": 38150, "loss": 0.1168, "lr": 5.58995737938911e-08, "epoch": 9.808650065530799, "percentage": 98.09, "elapsed_time": "1:01:55", "remaining_time": "0:01:12", "throughput": 2700.35, "total_tokens": 10034280}
7504
+ {"current_steps": 37425, "total_steps": 38150, "loss": 0.1163, "lr": 5.513776925078207e-08, "epoch": 9.809960681520314, "percentage": 98.1, "elapsed_time": "1:01:56", "remaining_time": "0:01:11", "throughput": 2700.45, "total_tokens": 10035960}
7505
+ {"current_steps": 37430, "total_steps": 38150, "loss": 0.0485, "lr": 5.4381185624238776e-08, "epoch": 9.81127129750983, "percentage": 98.11, "elapsed_time": "1:01:56", "remaining_time": "0:01:11", "throughput": 2700.43, "total_tokens": 10036984}
7506
+ {"current_steps": 37435, "total_steps": 38150, "loss": 0.1255, "lr": 5.362982307261233e-08, "epoch": 9.812581913499345, "percentage": 98.13, "elapsed_time": "1:01:57", "remaining_time": "0:01:10", "throughput": 2700.4, "total_tokens": 10038008}
7507
+ {"current_steps": 37440, "total_steps": 38150, "loss": 0.1374, "lr": 5.2883681753157497e-08, "epoch": 9.813892529488859, "percentage": 98.14, "elapsed_time": "1:01:57", "remaining_time": "0:01:10", "throughput": 2700.39, "total_tokens": 10039112}
7508
+ {"current_steps": 37445, "total_steps": 38150, "loss": 0.1028, "lr": 5.214276182204381e-08, "epoch": 9.815203145478375, "percentage": 98.15, "elapsed_time": "1:01:58", "remaining_time": "0:01:10", "throughput": 2700.38, "total_tokens": 10040216}
7509
+ {"current_steps": 37450, "total_steps": 38150, "loss": 0.1403, "lr": 5.140706343434165e-08, "epoch": 9.81651376146789, "percentage": 98.17, "elapsed_time": "1:01:58", "remaining_time": "0:01:09", "throughput": 2700.4, "total_tokens": 10041528}
7510
+ {"current_steps": 37455, "total_steps": 38150, "loss": 0.1066, "lr": 5.0676586744036194e-08, "epoch": 9.817824377457406, "percentage": 98.18, "elapsed_time": "1:01:58", "remaining_time": "0:01:09", "throughput": 2700.39, "total_tokens": 10042600}
7511
+ {"current_steps": 37460, "total_steps": 38150, "loss": 0.158, "lr": 4.9951331904007915e-08, "epoch": 9.81913499344692, "percentage": 98.19, "elapsed_time": "1:01:59", "remaining_time": "0:01:08", "throughput": 2700.43, "total_tokens": 10043960}
7512
+ {"current_steps": 37465, "total_steps": 38150, "loss": 0.1343, "lr": 4.923129906606039e-08, "epoch": 9.820445609436435, "percentage": 98.2, "elapsed_time": "1:01:59", "remaining_time": "0:01:08", "throughput": 2700.43, "total_tokens": 10045080}
7513
+ {"current_steps": 37470, "total_steps": 38150, "loss": 0.1046, "lr": 4.851648838088696e-08, "epoch": 9.82175622542595, "percentage": 98.22, "elapsed_time": "1:02:00", "remaining_time": "0:01:07", "throughput": 2700.48, "total_tokens": 10046440}
7514
+ {"current_steps": 37475, "total_steps": 38150, "loss": 0.1237, "lr": 4.7806899998101283e-08, "epoch": 9.823066841415466, "percentage": 98.23, "elapsed_time": "1:02:00", "remaining_time": "0:01:07", "throughput": 2700.44, "total_tokens": 10047416}
7515
+ {"current_steps": 37480, "total_steps": 38150, "loss": 0.1087, "lr": 4.71025340662179e-08, "epoch": 9.82437745740498, "percentage": 98.24, "elapsed_time": "1:02:01", "remaining_time": "0:01:06", "throughput": 2700.46, "total_tokens": 10048648}
7516
+ {"current_steps": 37485, "total_steps": 38150, "loss": 0.1663, "lr": 4.6403390732654985e-08, "epoch": 9.825688073394495, "percentage": 98.26, "elapsed_time": "1:02:01", "remaining_time": "0:01:06", "throughput": 2700.49, "total_tokens": 10050008}
7517
+ {"current_steps": 37490, "total_steps": 38150, "loss": 0.1559, "lr": 4.570947014374827e-08, "epoch": 9.82699868938401, "percentage": 98.27, "elapsed_time": "1:02:02", "remaining_time": "0:01:05", "throughput": 2700.6, "total_tokens": 10051752}
7518
+ {"current_steps": 37495, "total_steps": 38150, "loss": 0.184, "lr": 4.502077244473435e-08, "epoch": 9.828309305373526, "percentage": 98.28, "elapsed_time": "1:02:02", "remaining_time": "0:01:05", "throughput": 2700.61, "total_tokens": 10053032}
7519
+ {"current_steps": 37500, "total_steps": 38150, "loss": 0.0667, "lr": 4.4337297779750705e-08, "epoch": 9.82961992136304, "percentage": 98.3, "elapsed_time": "1:02:02", "remaining_time": "0:01:04", "throughput": 2700.6, "total_tokens": 10054104}
7520
+ {"current_steps": 37505, "total_steps": 38150, "loss": 0.0723, "lr": 4.365904629185236e-08, "epoch": 9.830930537352556, "percentage": 98.31, "elapsed_time": "1:02:03", "remaining_time": "0:01:04", "throughput": 2700.6, "total_tokens": 10055288}
7521
+ {"current_steps": 37510, "total_steps": 38150, "loss": 0.1856, "lr": 4.298601812299241e-08, "epoch": 9.832241153342071, "percentage": 98.32, "elapsed_time": "1:02:03", "remaining_time": "0:01:03", "throughput": 2700.68, "total_tokens": 10056808}
7522
+ {"current_steps": 37515, "total_steps": 38150, "loss": 0.1168, "lr": 4.2318213414038745e-08, "epoch": 9.833551769331585, "percentage": 98.34, "elapsed_time": "1:02:04", "remaining_time": "0:01:03", "throughput": 2700.68, "total_tokens": 10057960}
7523
+ {"current_steps": 37520, "total_steps": 38150, "loss": 0.1311, "lr": 4.1655632304757334e-08, "epoch": 9.8348623853211, "percentage": 98.35, "elapsed_time": "1:02:04", "remaining_time": "0:01:02", "throughput": 2700.75, "total_tokens": 10059544}
7524
+ {"current_steps": 37525, "total_steps": 38150, "loss": 0.1012, "lr": 4.0998274933828905e-08, "epoch": 9.836173001310616, "percentage": 98.36, "elapsed_time": "1:02:05", "remaining_time": "0:01:02", "throughput": 2700.75, "total_tokens": 10060680}
7525
+ {"current_steps": 37530, "total_steps": 38150, "loss": 0.1466, "lr": 4.034614143883508e-08, "epoch": 9.837483617300132, "percentage": 98.37, "elapsed_time": "1:02:05", "remaining_time": "0:01:01", "throughput": 2700.76, "total_tokens": 10061848}
7526
+ {"current_steps": 37535, "total_steps": 38150, "loss": 0.1198, "lr": 3.969923195626668e-08, "epoch": 9.838794233289645, "percentage": 98.39, "elapsed_time": "1:02:05", "remaining_time": "0:01:01", "throughput": 2700.82, "total_tokens": 10063224}
7527
+ {"current_steps": 37540, "total_steps": 38150, "loss": 0.0972, "lr": 3.9057546621520946e-08, "epoch": 9.840104849279161, "percentage": 98.4, "elapsed_time": "1:02:06", "remaining_time": "0:01:00", "throughput": 2700.87, "total_tokens": 10064728}
7528
+ {"current_steps": 37545, "total_steps": 38150, "loss": 0.0511, "lr": 3.842108556890156e-08, "epoch": 9.841415465268676, "percentage": 98.41, "elapsed_time": "1:02:06", "remaining_time": "0:01:00", "throughput": 2700.87, "total_tokens": 10065832}
7529
+ {"current_steps": 37550, "total_steps": 38150, "loss": 0.0977, "lr": 3.778984893161863e-08, "epoch": 9.842726081258192, "percentage": 98.43, "elapsed_time": "1:02:07", "remaining_time": "0:00:59", "throughput": 2700.9, "total_tokens": 10067112}
7530
+ {"current_steps": 37555, "total_steps": 38150, "loss": 0.1898, "lr": 3.7163836841791476e-08, "epoch": 9.844036697247706, "percentage": 98.44, "elapsed_time": "1:02:07", "remaining_time": "0:00:59", "throughput": 2700.89, "total_tokens": 10068264}
7531
+ {"current_steps": 37560, "total_steps": 38150, "loss": 0.1518, "lr": 3.654304943043752e-08, "epoch": 9.845347313237221, "percentage": 98.45, "elapsed_time": "1:02:08", "remaining_time": "0:00:58", "throughput": 2700.9, "total_tokens": 10069448}
7532
+ {"current_steps": 37565, "total_steps": 38150, "loss": 0.1902, "lr": 3.5927486827491696e-08, "epoch": 9.846657929226737, "percentage": 98.47, "elapsed_time": "1:02:08", "remaining_time": "0:00:58", "throughput": 2700.95, "total_tokens": 10070776}
7533
+ {"current_steps": 37570, "total_steps": 38150, "loss": 0.0726, "lr": 3.531714916178708e-08, "epoch": 9.847968545216252, "percentage": 98.48, "elapsed_time": "1:02:09", "remaining_time": "0:00:57", "throughput": 2700.95, "total_tokens": 10071928}
7534
+ {"current_steps": 37575, "total_steps": 38150, "loss": 0.1119, "lr": 3.4712036561068693e-08, "epoch": 9.849279161205766, "percentage": 98.49, "elapsed_time": "1:02:09", "remaining_time": "0:00:57", "throughput": 2700.91, "total_tokens": 10072888}
7535
+ {"current_steps": 37580, "total_steps": 38150, "loss": 0.0553, "lr": 3.4112149151982466e-08, "epoch": 9.850589777195282, "percentage": 98.51, "elapsed_time": "1:02:09", "remaining_time": "0:00:56", "throughput": 2700.9, "total_tokens": 10074008}
7536
+ {"current_steps": 37585, "total_steps": 38150, "loss": 0.1489, "lr": 3.351748706008628e-08, "epoch": 9.851900393184797, "percentage": 98.52, "elapsed_time": "1:02:10", "remaining_time": "0:00:56", "throughput": 2700.97, "total_tokens": 10075544}
7537
+ {"current_steps": 37590, "total_steps": 38150, "loss": 0.1933, "lr": 3.292805040984171e-08, "epoch": 9.853211009174313, "percentage": 98.53, "elapsed_time": "1:02:10", "remaining_time": "0:00:55", "throughput": 2701.01, "total_tokens": 10076920}
7538
+ {"current_steps": 37595, "total_steps": 38150, "loss": 0.1413, "lr": 3.234383932461671e-08, "epoch": 9.854521625163827, "percentage": 98.55, "elapsed_time": "1:02:11", "remaining_time": "0:00:55", "throughput": 2701.08, "total_tokens": 10078456}
7539
+ {"current_steps": 37600, "total_steps": 38150, "loss": 0.1613, "lr": 3.176485392668571e-08, "epoch": 9.855832241153342, "percentage": 98.56, "elapsed_time": "1:02:11", "remaining_time": "0:00:54", "throughput": 2701.15, "total_tokens": 10079896}
7540
+ {"current_steps": 37605, "total_steps": 38150, "loss": 0.1368, "lr": 3.119109433722955e-08, "epoch": 9.857142857142858, "percentage": 98.57, "elapsed_time": "1:02:12", "remaining_time": "0:00:54", "throughput": 2701.23, "total_tokens": 10081512}
7541
+ {"current_steps": 37610, "total_steps": 38150, "loss": 0.306, "lr": 3.0622560676332734e-08, "epoch": 9.858453473132371, "percentage": 98.58, "elapsed_time": "1:02:12", "remaining_time": "0:00:53", "throughput": 2701.31, "total_tokens": 10083096}
7542
+ {"current_steps": 37615, "total_steps": 38150, "loss": 0.1717, "lr": 3.005925306299173e-08, "epoch": 9.859764089121887, "percentage": 98.6, "elapsed_time": "1:02:13", "remaining_time": "0:00:53", "throughput": 2701.36, "total_tokens": 10084632}
7543
+ {"current_steps": 37620, "total_steps": 38150, "loss": 0.1314, "lr": 2.9501171615103907e-08, "epoch": 9.861074705111402, "percentage": 98.61, "elapsed_time": "1:02:13", "remaining_time": "0:00:52", "throughput": 2701.43, "total_tokens": 10086152}
7544
+ {"current_steps": 37625, "total_steps": 38150, "loss": 0.1858, "lr": 2.8948316449473044e-08, "epoch": 9.862385321100918, "percentage": 98.62, "elapsed_time": "1:02:14", "remaining_time": "0:00:52", "throughput": 2701.52, "total_tokens": 10087864}
7545
+ {"current_steps": 37630, "total_steps": 38150, "loss": 0.1576, "lr": 2.840068768181492e-08, "epoch": 9.863695937090432, "percentage": 98.64, "elapsed_time": "1:02:14", "remaining_time": "0:00:51", "throughput": 2701.56, "total_tokens": 10089288}
7546
+ {"current_steps": 37635, "total_steps": 38150, "loss": 0.1762, "lr": 2.785828542674618e-08, "epoch": 9.865006553079947, "percentage": 98.65, "elapsed_time": "1:02:15", "remaining_time": "0:00:51", "throughput": 2701.6, "total_tokens": 10090552}
7547
+ {"current_steps": 37640, "total_steps": 38150, "loss": 0.146, "lr": 2.7321109797787125e-08, "epoch": 9.866317169069463, "percentage": 98.66, "elapsed_time": "1:02:15", "remaining_time": "0:00:50", "throughput": 2701.62, "total_tokens": 10091944}
7548
+ {"current_steps": 37645, "total_steps": 38150, "loss": 0.1375, "lr": 2.6789160907372822e-08, "epoch": 9.867627785058978, "percentage": 98.68, "elapsed_time": "1:02:15", "remaining_time": "0:00:50", "throughput": 2701.68, "total_tokens": 10093400}
7549
+ {"current_steps": 37650, "total_steps": 38150, "loss": 0.1358, "lr": 2.626243886683366e-08, "epoch": 9.868938401048492, "percentage": 98.69, "elapsed_time": "1:02:16", "remaining_time": "0:00:49", "throughput": 2701.68, "total_tokens": 10094536}
7550
+ {"current_steps": 37655, "total_steps": 38150, "loss": 0.1291, "lr": 2.574094378641756e-08, "epoch": 9.870249017038008, "percentage": 98.7, "elapsed_time": "1:02:16", "remaining_time": "0:00:49", "throughput": 2701.7, "total_tokens": 10095736}
7551
+ {"current_steps": 37660, "total_steps": 38150, "loss": 0.2504, "lr": 2.5224675775270544e-08, "epoch": 9.871559633027523, "percentage": 98.72, "elapsed_time": "1:02:17", "remaining_time": "0:00:48", "throughput": 2701.71, "total_tokens": 10096888}
7552
+ {"current_steps": 37665, "total_steps": 38150, "loss": 0.0533, "lr": 2.4713634941442298e-08, "epoch": 9.872870249017039, "percentage": 98.73, "elapsed_time": "1:02:17", "remaining_time": "0:00:48", "throughput": 2701.72, "total_tokens": 10098072}
7553
+ {"current_steps": 37670, "total_steps": 38150, "loss": 0.2349, "lr": 2.4207821391900032e-08, "epoch": 9.874180865006553, "percentage": 98.74, "elapsed_time": "1:02:18", "remaining_time": "0:00:47", "throughput": 2701.82, "total_tokens": 10099832}
7554
+ {"current_steps": 37675, "total_steps": 38150, "loss": 0.059, "lr": 2.370723523250351e-08, "epoch": 9.875491480996068, "percentage": 98.75, "elapsed_time": "1:02:18", "remaining_time": "0:00:47", "throughput": 2701.89, "total_tokens": 10101352}
7555
+ {"current_steps": 37680, "total_steps": 38150, "loss": 0.1163, "lr": 2.321187656802726e-08, "epoch": 9.876802096985584, "percentage": 98.77, "elapsed_time": "1:02:19", "remaining_time": "0:00:46", "throughput": 2701.93, "total_tokens": 10102632}
7556
+ {"current_steps": 37685, "total_steps": 38150, "loss": 0.1317, "lr": 2.272174550214945e-08, "epoch": 9.8781127129751, "percentage": 98.78, "elapsed_time": "1:02:19", "remaining_time": "0:00:46", "throughput": 2701.91, "total_tokens": 10103688}
7557
+ {"current_steps": 37690, "total_steps": 38150, "loss": 0.0558, "lr": 2.223684213745192e-08, "epoch": 9.879423328964613, "percentage": 98.79, "elapsed_time": "1:02:19", "remaining_time": "0:00:45", "throughput": 2701.92, "total_tokens": 10104904}
7558
+ {"current_steps": 37695, "total_steps": 38150, "loss": 0.1807, "lr": 2.1757166575425702e-08, "epoch": 9.880733944954128, "percentage": 98.81, "elapsed_time": "1:02:20", "remaining_time": "0:00:45", "throughput": 2701.97, "total_tokens": 10106328}
7559
+ {"current_steps": 37700, "total_steps": 38150, "loss": 0.1013, "lr": 2.1282718916465494e-08, "epoch": 9.882044560943644, "percentage": 98.82, "elapsed_time": "1:02:20", "remaining_time": "0:00:44", "throughput": 2701.98, "total_tokens": 10107496}
7560
+ {"current_steps": 37705, "total_steps": 38150, "loss": 0.1287, "lr": 2.0813499259872414e-08, "epoch": 9.883355176933158, "percentage": 98.83, "elapsed_time": "1:02:21", "remaining_time": "0:00:44", "throughput": 2701.96, "total_tokens": 10108632}
7561
+ {"current_steps": 37710, "total_steps": 38150, "loss": 0.1425, "lr": 2.0349507703851244e-08, "epoch": 9.884665792922673, "percentage": 98.85, "elapsed_time": "1:02:21", "remaining_time": "0:00:43", "throughput": 2702.02, "total_tokens": 10110008}
7562
+ {"current_steps": 37715, "total_steps": 38150, "loss": 0.1604, "lr": 1.9890744345518742e-08, "epoch": 9.885976408912189, "percentage": 98.86, "elapsed_time": "1:02:22", "remaining_time": "0:00:43", "throughput": 2702.04, "total_tokens": 10111272}
7563
+ {"current_steps": 37720, "total_steps": 38150, "loss": 0.1352, "lr": 1.9437209280889768e-08, "epoch": 9.887287024901704, "percentage": 98.87, "elapsed_time": "1:02:22", "remaining_time": "0:00:42", "throughput": 2702.14, "total_tokens": 10112920}
7564
+ {"current_steps": 37725, "total_steps": 38150, "loss": 0.2724, "lr": 1.8988902604891166e-08, "epoch": 9.888597640891218, "percentage": 98.89, "elapsed_time": "1:02:23", "remaining_time": "0:00:42", "throughput": 2702.26, "total_tokens": 10114760}
7565
+ {"current_steps": 37730, "total_steps": 38150, "loss": 0.267, "lr": 1.8545824411350665e-08, "epoch": 9.889908256880734, "percentage": 98.9, "elapsed_time": "1:02:23", "remaining_time": "0:00:41", "throughput": 2702.3, "total_tokens": 10116120}
7566
+ {"current_steps": 37735, "total_steps": 38150, "loss": 0.1483, "lr": 1.810797479300519e-08, "epoch": 9.89121887287025, "percentage": 98.91, "elapsed_time": "1:02:23", "remaining_time": "0:00:41", "throughput": 2702.4, "total_tokens": 10117784}
7567
+ {"current_steps": 37740, "total_steps": 38150, "loss": 0.1647, "lr": 1.7675353841495325e-08, "epoch": 9.892529488859765, "percentage": 98.93, "elapsed_time": "1:02:24", "remaining_time": "0:00:40", "throughput": 2702.43, "total_tokens": 10119112}
7568
+ {"current_steps": 37745, "total_steps": 38150, "loss": 0.2018, "lr": 1.7247961647368082e-08, "epoch": 9.893840104849279, "percentage": 98.94, "elapsed_time": "1:02:24", "remaining_time": "0:00:40", "throughput": 2702.46, "total_tokens": 10120424}
7569
+ {"current_steps": 37750, "total_steps": 38150, "loss": 0.1114, "lr": 1.6825798300074137e-08, "epoch": 9.895150720838794, "percentage": 98.95, "elapsed_time": "1:02:25", "remaining_time": "0:00:39", "throughput": 2702.45, "total_tokens": 10121592}
7570
+ {"current_steps": 37755, "total_steps": 38150, "loss": 0.07, "lr": 1.640886388797336e-08, "epoch": 9.89646133682831, "percentage": 98.96, "elapsed_time": "1:02:25", "remaining_time": "0:00:39", "throughput": 2702.42, "total_tokens": 10122600}
7571
+ {"current_steps": 37760, "total_steps": 38150, "loss": 0.1285, "lr": 1.5997158498329277e-08, "epoch": 9.897771952817825, "percentage": 98.98, "elapsed_time": "1:02:26", "remaining_time": "0:00:38", "throughput": 2702.44, "total_tokens": 10123880}
7572
+ {"current_steps": 37765, "total_steps": 38150, "loss": 0.0753, "lr": 1.559068221731186e-08, "epoch": 9.899082568807339, "percentage": 98.99, "elapsed_time": "1:02:26", "remaining_time": "0:00:38", "throughput": 2702.46, "total_tokens": 10125112}
7573
+ {"current_steps": 37770, "total_steps": 38150, "loss": 0.1168, "lr": 1.518943512999471e-08, "epoch": 9.900393184796854, "percentage": 99.0, "elapsed_time": "1:02:27", "remaining_time": "0:00:37", "throughput": 2702.45, "total_tokens": 10126280}
7574
+ {"current_steps": 37775, "total_steps": 38150, "loss": 0.1289, "lr": 1.4793417320357882e-08, "epoch": 9.90170380078637, "percentage": 99.02, "elapsed_time": "1:02:27", "remaining_time": "0:00:37", "throughput": 2702.55, "total_tokens": 10128024}
7575
+ {"current_steps": 37780, "total_steps": 38150, "loss": 0.1358, "lr": 1.4402628871285073e-08, "epoch": 9.903014416775886, "percentage": 99.03, "elapsed_time": "1:02:28", "remaining_time": "0:00:36", "throughput": 2702.61, "total_tokens": 10129528}
7576
+ {"current_steps": 37785, "total_steps": 38150, "loss": 0.1429, "lr": 1.401706986457474e-08, "epoch": 9.9043250327654, "percentage": 99.04, "elapsed_time": "1:02:28", "remaining_time": "0:00:36", "throughput": 2702.7, "total_tokens": 10131192}
7577
+ {"current_steps": 37790, "total_steps": 38150, "loss": 0.2331, "lr": 1.3636740380915113e-08, "epoch": 9.905635648754915, "percentage": 99.06, "elapsed_time": "1:02:28", "remaining_time": "0:00:35", "throughput": 2702.71, "total_tokens": 10132424}
7578
+ {"current_steps": 37795, "total_steps": 38150, "loss": 0.1162, "lr": 1.326164049991474e-08, "epoch": 9.90694626474443, "percentage": 99.07, "elapsed_time": "1:02:29", "remaining_time": "0:00:35", "throughput": 2702.76, "total_tokens": 10133816}
7579
+ {"current_steps": 37800, "total_steps": 38150, "loss": 0.194, "lr": 1.2891770300080263e-08, "epoch": 9.908256880733944, "percentage": 99.08, "elapsed_time": "1:02:29", "remaining_time": "0:00:34", "throughput": 2702.81, "total_tokens": 10135176}
7580
+ {"current_steps": 37805, "total_steps": 38150, "loss": 0.154, "lr": 1.2527129858821985e-08, "epoch": 9.90956749672346, "percentage": 99.1, "elapsed_time": "1:02:30", "remaining_time": "0:00:34", "throughput": 2702.83, "total_tokens": 10136376}
7581
+ {"current_steps": 37810, "total_steps": 38150, "loss": 0.2252, "lr": 1.2167719252462183e-08, "epoch": 9.910878112712975, "percentage": 99.11, "elapsed_time": "1:02:30", "remaining_time": "0:00:33", "throughput": 2702.98, "total_tokens": 10138408}
7582
+ {"current_steps": 37815, "total_steps": 38150, "loss": 0.1162, "lr": 1.1813538556221249e-08, "epoch": 9.91218872870249, "percentage": 99.12, "elapsed_time": "1:02:31", "remaining_time": "0:00:33", "throughput": 2703.01, "total_tokens": 10139672}
7583
+ {"current_steps": 37820, "total_steps": 38150, "loss": 0.2525, "lr": 1.1464587844231544e-08, "epoch": 9.913499344692005, "percentage": 99.13, "elapsed_time": "1:02:31", "remaining_time": "0:00:32", "throughput": 2703.11, "total_tokens": 10141624}
7584
+ {"current_steps": 37825, "total_steps": 38150, "loss": 0.1496, "lr": 1.1120867189526318e-08, "epoch": 9.91480996068152, "percentage": 99.15, "elapsed_time": "1:02:32", "remaining_time": "0:00:32", "throughput": 2703.12, "total_tokens": 10142920}
7585
+ {"current_steps": 37830, "total_steps": 38150, "loss": 0.1283, "lr": 1.0782376664045246e-08, "epoch": 9.916120576671036, "percentage": 99.16, "elapsed_time": "1:02:32", "remaining_time": "0:00:31", "throughput": 2703.23, "total_tokens": 10144664}
7586
+ {"current_steps": 37835, "total_steps": 38150, "loss": 0.0768, "lr": 1.044911633863721e-08, "epoch": 9.917431192660551, "percentage": 99.17, "elapsed_time": "1:02:33", "remaining_time": "0:00:31", "throughput": 2703.27, "total_tokens": 10146008}
7587
+ {"current_steps": 37840, "total_steps": 38150, "loss": 0.1012, "lr": 1.0121086283049198e-08, "epoch": 9.918741808650065, "percentage": 99.19, "elapsed_time": "1:02:33", "remaining_time": "0:00:30", "throughput": 2703.35, "total_tokens": 10147608}
7588
+ {"current_steps": 37845, "total_steps": 38150, "loss": 0.184, "lr": 9.798286565937399e-09, "epoch": 9.92005242463958, "percentage": 99.2, "elapsed_time": "1:02:34", "remaining_time": "0:00:30", "throughput": 2703.35, "total_tokens": 10148712}
7589
+ {"current_steps": 37850, "total_steps": 38150, "loss": 0.1798, "lr": 9.48071725486721e-09, "epoch": 9.921363040629096, "percentage": 99.21, "elapsed_time": "1:02:34", "remaining_time": "0:00:29", "throughput": 2703.39, "total_tokens": 10150040}
7590
+ {"current_steps": 37855, "total_steps": 38150, "loss": 0.1222, "lr": 9.168378416299362e-09, "epoch": 9.922673656618612, "percentage": 99.23, "elapsed_time": "1:02:35", "remaining_time": "0:00:29", "throughput": 2703.52, "total_tokens": 10152024}
7591
+ {"current_steps": 37860, "total_steps": 38150, "loss": 0.2142, "lr": 8.861270115612108e-09, "epoch": 9.923984272608125, "percentage": 99.24, "elapsed_time": "1:02:35", "remaining_time": "0:00:28", "throughput": 2703.59, "total_tokens": 10153480}
7592
+ {"current_steps": 37865, "total_steps": 38150, "loss": 0.1423, "lr": 8.559392417079038e-09, "epoch": 9.92529488859764, "percentage": 99.25, "elapsed_time": "1:02:36", "remaining_time": "0:00:28", "throughput": 2703.64, "total_tokens": 10154936}
7593
+ {"current_steps": 37870, "total_steps": 38150, "loss": 0.0476, "lr": 8.262745383880166e-09, "epoch": 9.926605504587156, "percentage": 99.27, "elapsed_time": "1:02:36", "remaining_time": "0:00:27", "throughput": 2703.61, "total_tokens": 10155960}
7594
+ {"current_steps": 37875, "total_steps": 38150, "loss": 0.2281, "lr": 7.971329078110268e-09, "epoch": 9.927916120576672, "percentage": 99.28, "elapsed_time": "1:02:36", "remaining_time": "0:00:27", "throughput": 2703.63, "total_tokens": 10157160}
7595
+ {"current_steps": 37880, "total_steps": 38150, "loss": 0.1047, "lr": 7.68514356075667e-09, "epoch": 9.929226736566186, "percentage": 99.29, "elapsed_time": "1:02:37", "remaining_time": "0:00:26", "throughput": 2703.64, "total_tokens": 10158472}
7596
+ {"current_steps": 37885, "total_steps": 38150, "loss": 0.2136, "lr": 7.4041888917186775e-09, "epoch": 9.930537352555701, "percentage": 99.31, "elapsed_time": "1:02:37", "remaining_time": "0:00:26", "throughput": 2703.67, "total_tokens": 10159832}
7597
+ {"current_steps": 37890, "total_steps": 38150, "loss": 0.0821, "lr": 7.1284651297992556e-09, "epoch": 9.931847968545217, "percentage": 99.32, "elapsed_time": "1:02:38", "remaining_time": "0:00:25", "throughput": 2703.7, "total_tokens": 10161096}
7598
+ {"current_steps": 37895, "total_steps": 38150, "loss": 0.0965, "lr": 6.8579723327105715e-09, "epoch": 9.93315858453473, "percentage": 99.33, "elapsed_time": "1:02:38", "remaining_time": "0:00:25", "throughput": 2703.75, "total_tokens": 10162616}
7599
+ {"current_steps": 37900, "total_steps": 38150, "loss": 0.1408, "lr": 6.5927105570601224e-09, "epoch": 9.934469200524246, "percentage": 99.34, "elapsed_time": "1:02:39", "remaining_time": "0:00:24", "throughput": 2703.77, "total_tokens": 10163912}
7600
+ {"current_steps": 37905, "total_steps": 38150, "loss": 0.1257, "lr": 6.3326798583729364e-09, "epoch": 9.935779816513762, "percentage": 99.36, "elapsed_time": "1:02:39", "remaining_time": "0:00:24", "throughput": 2703.8, "total_tokens": 10165224}
7601
+ {"current_steps": 37910, "total_steps": 38150, "loss": 0.1584, "lr": 6.077880291069371e-09, "epoch": 9.937090432503277, "percentage": 99.37, "elapsed_time": "1:02:40", "remaining_time": "0:00:23", "throughput": 2703.88, "total_tokens": 10166904}
7602
+ {"current_steps": 37915, "total_steps": 38150, "loss": 0.1462, "lr": 5.8283119084789895e-09, "epoch": 9.938401048492791, "percentage": 99.38, "elapsed_time": "1:02:40", "remaining_time": "0:00:23", "throughput": 2703.93, "total_tokens": 10168280}
7603
+ {"current_steps": 37920, "total_steps": 38150, "loss": 0.1491, "lr": 5.583974762835009e-09, "epoch": 9.939711664482306, "percentage": 99.4, "elapsed_time": "1:02:40", "remaining_time": "0:00:22", "throughput": 2703.98, "total_tokens": 10169624}
7604
+ {"current_steps": 37925, "total_steps": 38150, "loss": 0.0979, "lr": 5.344868905279854e-09, "epoch": 9.941022280471822, "percentage": 99.41, "elapsed_time": "1:02:41", "remaining_time": "0:00:22", "throughput": 2703.98, "total_tokens": 10170840}
7605
+ {"current_steps": 37930, "total_steps": 38150, "loss": 0.092, "lr": 5.110994385856826e-09, "epoch": 9.942332896461338, "percentage": 99.42, "elapsed_time": "1:02:41", "remaining_time": "0:00:21", "throughput": 2703.98, "total_tokens": 10172008}
7606
+ {"current_steps": 37935, "total_steps": 38150, "loss": 0.0853, "lr": 4.8823512535128845e-09, "epoch": 9.943643512450851, "percentage": 99.44, "elapsed_time": "1:02:42", "remaining_time": "0:00:21", "throughput": 2704.2, "total_tokens": 10174936}
7607
+ {"current_steps": 37940, "total_steps": 38150, "loss": 0.2499, "lr": 4.658939556104191e-09, "epoch": 9.944954128440367, "percentage": 99.45, "elapsed_time": "1:02:43", "remaining_time": "0:00:20", "throughput": 2704.23, "total_tokens": 10176232}
7608
+ {"current_steps": 37945, "total_steps": 38150, "loss": 0.1635, "lr": 4.440759340393341e-09, "epoch": 9.946264744429882, "percentage": 99.46, "elapsed_time": "1:02:43", "remaining_time": "0:00:20", "throughput": 2704.24, "total_tokens": 10177416}
7609
+ {"current_steps": 37950, "total_steps": 38150, "loss": 0.0945, "lr": 4.227810652041031e-09, "epoch": 9.947575360419398, "percentage": 99.48, "elapsed_time": "1:02:43", "remaining_time": "0:00:19", "throughput": 2704.29, "total_tokens": 10178792}
7610
+ {"current_steps": 37955, "total_steps": 38150, "loss": 0.0634, "lr": 4.0200935356171645e-09, "epoch": 9.948885976408912, "percentage": 99.49, "elapsed_time": "1:02:44", "remaining_time": "0:00:19", "throughput": 2704.3, "total_tokens": 10179912}
7611
+ {"current_steps": 37960, "total_steps": 38150, "loss": 0.0964, "lr": 3.817608034600851e-09, "epoch": 9.950196592398427, "percentage": 99.5, "elapsed_time": "1:02:44", "remaining_time": "0:00:18", "throughput": 2704.32, "total_tokens": 10181160}
7612
+ {"current_steps": 37965, "total_steps": 38150, "loss": 0.2594, "lr": 3.620354191366526e-09, "epoch": 9.951507208387943, "percentage": 99.52, "elapsed_time": "1:02:45", "remaining_time": "0:00:18", "throughput": 2704.33, "total_tokens": 10182376}
7613
+ {"current_steps": 37970, "total_steps": 38150, "loss": 0.2145, "lr": 3.4283320472033864e-09, "epoch": 9.952817824377458, "percentage": 99.53, "elapsed_time": "1:02:45", "remaining_time": "0:00:17", "throughput": 2704.38, "total_tokens": 10183816}
7614
+ {"current_steps": 37975, "total_steps": 38150, "loss": 0.2383, "lr": 3.241541642298729e-09, "epoch": 9.954128440366972, "percentage": 99.54, "elapsed_time": "1:02:46", "remaining_time": "0:00:17", "throughput": 2704.39, "total_tokens": 10185016}
7615
+ {"current_steps": 37980, "total_steps": 38150, "loss": 0.124, "lr": 3.059983015749057e-09, "epoch": 9.955439056356488, "percentage": 99.55, "elapsed_time": "1:02:46", "remaining_time": "0:00:16", "throughput": 2704.4, "total_tokens": 10186248}
7616
+ {"current_steps": 37985, "total_steps": 38150, "loss": 0.0986, "lr": 2.88365620555453e-09, "epoch": 9.956749672346003, "percentage": 99.57, "elapsed_time": "1:02:47", "remaining_time": "0:00:16", "throughput": 2704.47, "total_tokens": 10187944}
7617
+ {"current_steps": 37990, "total_steps": 38150, "loss": 0.1467, "lr": 2.712561248618961e-09, "epoch": 9.958060288335517, "percentage": 99.58, "elapsed_time": "1:02:47", "remaining_time": "0:00:15", "throughput": 2704.47, "total_tokens": 10189064}
7618
+ {"current_steps": 37995, "total_steps": 38150, "loss": 0.1451, "lr": 2.546698180749818e-09, "epoch": 9.959370904325032, "percentage": 99.59, "elapsed_time": "1:02:47", "remaining_time": "0:00:15", "throughput": 2704.54, "total_tokens": 10190616}
7619
+ {"current_steps": 38000, "total_steps": 38150, "loss": 0.1541, "lr": 2.3860670366665505e-09, "epoch": 9.960681520314548, "percentage": 99.61, "elapsed_time": "1:02:48", "remaining_time": "0:00:14", "throughput": 2704.62, "total_tokens": 10192200}
7620
+ {"current_steps": 38005, "total_steps": 38150, "loss": 0.1491, "lr": 2.2306678499867116e-09, "epoch": 9.961992136304064, "percentage": 99.62, "elapsed_time": "1:02:48", "remaining_time": "0:00:14", "throughput": 2704.73, "total_tokens": 10193928}
7621
+ {"current_steps": 38010, "total_steps": 38150, "loss": 0.1615, "lr": 2.080500653234285e-09, "epoch": 9.963302752293577, "percentage": 99.63, "elapsed_time": "1:02:49", "remaining_time": "0:00:13", "throughput": 2704.77, "total_tokens": 10195240}
7622
+ {"current_steps": 38015, "total_steps": 38150, "loss": 0.1564, "lr": 1.935565477839685e-09, "epoch": 9.964613368283093, "percentage": 99.65, "elapsed_time": "1:02:49", "remaining_time": "0:00:13", "throughput": 2704.89, "total_tokens": 10197112}
7623
+ {"current_steps": 38020, "total_steps": 38150, "loss": 0.1546, "lr": 1.7958623541397546e-09, "epoch": 9.965923984272608, "percentage": 99.66, "elapsed_time": "1:02:50", "remaining_time": "0:00:12", "throughput": 2704.92, "total_tokens": 10198408}
7624
+ {"current_steps": 38025, "total_steps": 38150, "loss": 0.0943, "lr": 1.6613913113694424e-09, "epoch": 9.967234600262124, "percentage": 99.67, "elapsed_time": "1:02:50", "remaining_time": "0:00:12", "throughput": 2704.96, "total_tokens": 10199784}
7625
+ {"current_steps": 38030, "total_steps": 38150, "loss": 0.0758, "lr": 1.532152377678453e-09, "epoch": 9.968545216251638, "percentage": 99.69, "elapsed_time": "1:02:51", "remaining_time": "0:00:11", "throughput": 2704.94, "total_tokens": 10200824}
7626
+ {"current_steps": 38035, "total_steps": 38150, "loss": 0.1086, "lr": 1.4081455801145948e-09, "epoch": 9.969855832241153, "percentage": 99.7, "elapsed_time": "1:02:51", "remaining_time": "0:00:11", "throughput": 2704.93, "total_tokens": 10201912}
7627
+ {"current_steps": 38040, "total_steps": 38150, "loss": 0.0755, "lr": 1.289370944629331e-09, "epoch": 9.971166448230669, "percentage": 99.71, "elapsed_time": "1:02:52", "remaining_time": "0:00:10", "throughput": 2704.89, "total_tokens": 10202856}
7628
+ {"current_steps": 38045, "total_steps": 38150, "loss": 0.1101, "lr": 1.1758284960861066e-09, "epoch": 9.972477064220184, "percentage": 99.72, "elapsed_time": "1:02:52", "remaining_time": "0:00:10", "throughput": 2704.87, "total_tokens": 10203896}
7629
+ {"current_steps": 38050, "total_steps": 38150, "loss": 0.1004, "lr": 1.0675182582464693e-09, "epoch": 9.973787680209698, "percentage": 99.74, "elapsed_time": "1:02:52", "remaining_time": "0:00:09", "throughput": 2704.92, "total_tokens": 10205272}
7630
+ {"current_steps": 38055, "total_steps": 38150, "loss": 0.1659, "lr": 9.644402537811736e-10, "epoch": 9.975098296199214, "percentage": 99.75, "elapsed_time": "1:02:53", "remaining_time": "0:00:09", "throughput": 2705.02, "total_tokens": 10206936}
7631
+ {"current_steps": 38060, "total_steps": 38150, "loss": 0.0795, "lr": 8.665945042618529e-10, "epoch": 9.97640891218873, "percentage": 99.76, "elapsed_time": "1:02:53", "remaining_time": "0:00:08", "throughput": 2704.98, "total_tokens": 10207912}
7632
+ {"current_steps": 38065, "total_steps": 38150, "loss": 0.1725, "lr": 7.739810301693462e-10, "epoch": 9.977719528178245, "percentage": 99.78, "elapsed_time": "1:02:54", "remaining_time": "0:00:08", "throughput": 2704.95, "total_tokens": 10208904}
7633
+ {"current_steps": 38070, "total_steps": 38150, "loss": 0.2182, "lr": 6.865998508881477e-10, "epoch": 9.979030144167758, "percentage": 99.79, "elapsed_time": "1:02:54", "remaining_time": "0:00:07", "throughput": 2704.92, "total_tokens": 10209928}
7634
+ {"current_steps": 38075, "total_steps": 38150, "loss": 0.0792, "lr": 6.044509847064062e-10, "epoch": 9.980340760157274, "percentage": 99.8, "elapsed_time": "1:02:55", "remaining_time": "0:00:07", "throughput": 2704.98, "total_tokens": 10211368}
7635
+ {"current_steps": 38080, "total_steps": 38150, "loss": 0.1433, "lr": 5.275344488187006e-10, "epoch": 9.98165137614679, "percentage": 99.82, "elapsed_time": "1:02:55", "remaining_time": "0:00:06", "throughput": 2705.0, "total_tokens": 10212600}
7636
+ {"current_steps": 38085, "total_steps": 38150, "loss": 0.1122, "lr": 4.5585025932048943e-10, "epoch": 9.982961992136303, "percentage": 99.83, "elapsed_time": "1:02:55", "remaining_time": "0:00:06", "throughput": 2704.98, "total_tokens": 10213656}
7637
+ {"current_steps": 38090, "total_steps": 38150, "loss": 0.2141, "lr": 3.893984312164367e-10, "epoch": 9.984272608125819, "percentage": 99.84, "elapsed_time": "1:02:56", "remaining_time": "0:00:05", "throughput": 2705.06, "total_tokens": 10215272}
7638
+ {"current_steps": 38095, "total_steps": 38150, "loss": 0.0881, "lr": 3.28178978417637e-10, "epoch": 9.985583224115334, "percentage": 99.86, "elapsed_time": "1:02:56", "remaining_time": "0:00:05", "throughput": 2705.12, "total_tokens": 10216776}
7639
+ {"current_steps": 38100, "total_steps": 38150, "loss": 0.0894, "lr": 2.7219191373328846e-10, "epoch": 9.98689384010485, "percentage": 99.87, "elapsed_time": "1:02:57", "remaining_time": "0:00:04", "throughput": 2705.12, "total_tokens": 10217960}
7640
+ {"current_steps": 38105, "total_steps": 38150, "loss": 0.0501, "lr": 2.2143724888179506e-10, "epoch": 9.988204456094364, "percentage": 99.88, "elapsed_time": "1:02:57", "remaining_time": "0:00:04", "throughput": 2705.14, "total_tokens": 10219192}
7641
+ {"current_steps": 38110, "total_steps": 38150, "loss": 0.2575, "lr": 1.759149944879912e-10, "epoch": 9.98951507208388, "percentage": 99.9, "elapsed_time": "1:02:58", "remaining_time": "0:00:03", "throughput": 2705.24, "total_tokens": 10220904}
7642
+ {"current_steps": 38115, "total_steps": 38150, "loss": 0.2622, "lr": 1.3562516008036597e-10, "epoch": 9.990825688073395, "percentage": 99.91, "elapsed_time": "1:02:58", "remaining_time": "0:00:03", "throughput": 2705.26, "total_tokens": 10222152}
7643
+ {"current_steps": 38120, "total_steps": 38150, "loss": 0.1456, "lr": 1.0056775408828767e-10, "epoch": 9.99213630406291, "percentage": 99.92, "elapsed_time": "1:02:59", "remaining_time": "0:00:02", "throughput": 2705.32, "total_tokens": 10223672}
7644
+ {"current_steps": 38125, "total_steps": 38150, "loss": 0.2053, "lr": 7.07427838503305e-11, "epoch": 9.993446920052424, "percentage": 99.93, "elapsed_time": "1:02:59", "remaining_time": "0:00:02", "throughput": 2705.48, "total_tokens": 10225736}
7645
+ {"current_steps": 38130, "total_steps": 38150, "loss": 0.1426, "lr": 4.6150255611498996e-11, "epoch": 9.99475753604194, "percentage": 99.95, "elapsed_time": "1:03:00", "remaining_time": "0:00:01", "throughput": 2705.57, "total_tokens": 10227320}
7646
+ {"current_steps": 38135, "total_steps": 38150, "loss": 0.1232, "lr": 2.679017451490129e-11, "epoch": 9.996068152031455, "percentage": 99.96, "elapsed_time": "1:03:00", "remaining_time": "0:00:01", "throughput": 2705.58, "total_tokens": 10228504}
7647
+ {"current_steps": 38140, "total_steps": 38150, "loss": 0.2702, "lr": 1.2662544615626993e-11, "epoch": 9.997378768020969, "percentage": 99.97, "elapsed_time": "1:03:00", "remaining_time": "0:00:00", "throughput": 2705.58, "total_tokens": 10229624}
7648
+ {"current_steps": 38145, "total_steps": 38150, "loss": 0.2777, "lr": 3.767368869644905e-12, "epoch": 9.998689384010484, "percentage": 99.99, "elapsed_time": "1:03:01", "remaining_time": "0:00:00", "throughput": 2705.63, "total_tokens": 10231048}
7649
+ {"current_steps": 38150, "total_steps": 38150, "loss": 0.256, "lr": 1.0464913657859399e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:03:01", "remaining_time": "0:00:00", "throughput": 2705.58, "total_tokens": 10232192}
7650
+ {"current_steps": 38150, "total_steps": 38150, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:03:03", "remaining_time": "0:00:00", "throughput": 2704.57, "total_tokens": 10232192}