Training in progress, step 38150
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +366 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2685028304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7564816a9ba964ddfa89f1d5a9f5f2ad346d3be3603eed4d77c8aadcd0362608
|
| 3 |
size 2685028304
|
trainer_log.jsonl
CHANGED
|
@@ -7282,3 +7282,369 @@
|
|
| 7282 |
{"current_steps": 36315, "total_steps": 38150, "loss": 0.3595, "lr": 3.519325364867265e-07, "epoch": 9.51900393184797, "percentage": 95.19, "elapsed_time": "1:34:50", "remaining_time": "0:04:47", "throughput": 1709.2, "total_tokens": 9726928}
|
| 7283 |
{"current_steps": 36320, "total_steps": 38150, "loss": 0.3025, "lr": 3.5002278211650476e-07, "epoch": 9.520314547837483, "percentage": 95.2, "elapsed_time": "1:34:51", "remaining_time": "0:04:46", "throughput": 1709.26, "total_tokens": 9728528}
|
| 7284 |
{"current_steps": 36325, "total_steps": 38150, "loss": 0.3353, "lr": 3.4811818694392706e-07, "epoch": 9.521625163826998, "percentage": 95.22, "elapsed_time": "1:34:52", "remaining_time": "0:04:45", "throughput": 1709.28, "total_tokens": 9729808}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7282 |
{"current_steps": 36315, "total_steps": 38150, "loss": 0.3595, "lr": 3.519325364867265e-07, "epoch": 9.51900393184797, "percentage": 95.19, "elapsed_time": "1:34:50", "remaining_time": "0:04:47", "throughput": 1709.2, "total_tokens": 9726928}
|
| 7283 |
{"current_steps": 36320, "total_steps": 38150, "loss": 0.3025, "lr": 3.5002278211650476e-07, "epoch": 9.520314547837483, "percentage": 95.2, "elapsed_time": "1:34:51", "remaining_time": "0:04:46", "throughput": 1709.26, "total_tokens": 9728528}
|
| 7284 |
{"current_steps": 36325, "total_steps": 38150, "loss": 0.3353, "lr": 3.4811818694392706e-07, "epoch": 9.521625163826998, "percentage": 95.22, "elapsed_time": "1:34:52", "remaining_time": "0:04:45", "throughput": 1709.28, "total_tokens": 9729808}
|
| 7285 |
+
{"current_steps": 36330, "total_steps": 38150, "loss": 0.3755, "lr": 3.462187513676135e-07, "epoch": 9.522935779816514, "percentage": 95.23, "elapsed_time": "1:34:52", "remaining_time": "0:04:45", "throughput": 1709.26, "total_tokens": 9730800}
|
| 7286 |
+
{"current_steps": 36335, "total_steps": 38150, "loss": 0.327, "lr": 3.4432447578511826e-07, "epoch": 9.52424639580603, "percentage": 95.24, "elapsed_time": "1:34:53", "remaining_time": "0:04:44", "throughput": 1709.3, "total_tokens": 9732144}
|
| 7287 |
+
{"current_steps": 36340, "total_steps": 38150, "loss": 0.5464, "lr": 3.4243536059291035e-07, "epoch": 9.525557011795543, "percentage": 95.26, "elapsed_time": "1:34:54", "remaining_time": "0:04:43", "throughput": 1709.33, "total_tokens": 9733424}
|
| 7288 |
+
{"current_steps": 36345, "total_steps": 38150, "loss": 0.4453, "lr": 3.405514061863735e-07, "epoch": 9.526867627785059, "percentage": 95.27, "elapsed_time": "1:34:54", "remaining_time": "0:04:42", "throughput": 1709.38, "total_tokens": 9734800}
|
| 7289 |
+
{"current_steps": 36350, "total_steps": 38150, "loss": 0.5759, "lr": 3.3867261295982e-07, "epoch": 9.528178243774574, "percentage": 95.28, "elapsed_time": "1:34:55", "remaining_time": "0:04:42", "throughput": 1709.43, "total_tokens": 9736272}
|
| 7290 |
+
{"current_steps": 36355, "total_steps": 38150, "loss": 0.4823, "lr": 3.36798981306477e-07, "epoch": 9.52948885976409, "percentage": 95.29, "elapsed_time": "1:34:56", "remaining_time": "0:04:41", "throughput": 1709.47, "total_tokens": 9737664}
|
| 7291 |
+
{"current_steps": 36360, "total_steps": 38150, "loss": 0.6085, "lr": 3.3493051161849477e-07, "epoch": 9.530799475753604, "percentage": 95.31, "elapsed_time": "1:34:56", "remaining_time": "0:04:40", "throughput": 1709.5, "total_tokens": 9738896}
|
| 7292 |
+
{"current_steps": 36365, "total_steps": 38150, "loss": 0.3231, "lr": 3.330672042869354e-07, "epoch": 9.53211009174312, "percentage": 95.32, "elapsed_time": "1:34:57", "remaining_time": "0:04:39", "throughput": 1709.55, "total_tokens": 9740288}
|
| 7293 |
+
{"current_steps": 36370, "total_steps": 38150, "loss": 0.4255, "lr": 3.312090597017925e-07, "epoch": 9.533420707732635, "percentage": 95.33, "elapsed_time": "1:34:58", "remaining_time": "0:04:38", "throughput": 1709.57, "total_tokens": 9741424}
|
| 7294 |
+
{"current_steps": 36375, "total_steps": 38150, "loss": 0.3459, "lr": 3.2935607825196614e-07, "epoch": 9.534731323722148, "percentage": 95.35, "elapsed_time": "1:34:58", "remaining_time": "0:04:38", "throughput": 1709.59, "total_tokens": 9742656}
|
| 7295 |
+
{"current_steps": 36380, "total_steps": 38150, "loss": 0.4675, "lr": 3.275082603252877e-07, "epoch": 9.536041939711664, "percentage": 95.36, "elapsed_time": "1:34:59", "remaining_time": "0:04:37", "throughput": 1709.62, "total_tokens": 9743952}
|
| 7296 |
+
{"current_steps": 36385, "total_steps": 38150, "loss": 0.4096, "lr": 3.2566560630849784e-07, "epoch": 9.53735255570118, "percentage": 95.37, "elapsed_time": "1:35:00", "remaining_time": "0:04:36", "throughput": 1709.66, "total_tokens": 9745344}
|
| 7297 |
+
{"current_steps": 36390, "total_steps": 38150, "loss": 0.361, "lr": 3.2382811658726033e-07, "epoch": 9.538663171690695, "percentage": 95.39, "elapsed_time": "1:35:00", "remaining_time": "0:04:35", "throughput": 1709.74, "total_tokens": 9747088}
|
| 7298 |
+
{"current_steps": 36395, "total_steps": 38150, "loss": 0.4035, "lr": 3.219957915461619e-07, "epoch": 9.539973787680209, "percentage": 95.4, "elapsed_time": "1:35:01", "remaining_time": "0:04:34", "throughput": 1709.77, "total_tokens": 9748416}
|
| 7299 |
+
{"current_steps": 36400, "total_steps": 38150, "loss": 0.5251, "lr": 3.201686315687041e-07, "epoch": 9.541284403669724, "percentage": 95.41, "elapsed_time": "1:35:02", "remaining_time": "0:04:34", "throughput": 1709.78, "total_tokens": 9749552}
|
| 7300 |
+
{"current_steps": 36405, "total_steps": 38150, "loss": 0.2695, "lr": 3.183466370373117e-07, "epoch": 9.54259501965924, "percentage": 95.43, "elapsed_time": "1:35:02", "remaining_time": "0:04:33", "throughput": 1709.79, "total_tokens": 9750624}
|
| 7301 |
+
{"current_steps": 36410, "total_steps": 38150, "loss": 0.387, "lr": 3.1652980833331846e-07, "epoch": 9.543905635648755, "percentage": 95.44, "elapsed_time": "1:35:03", "remaining_time": "0:04:32", "throughput": 1709.85, "total_tokens": 9752272}
|
| 7302 |
+
{"current_steps": 36415, "total_steps": 38150, "loss": 0.5501, "lr": 3.147181458369897e-07, "epoch": 9.54521625163827, "percentage": 95.45, "elapsed_time": "1:35:04", "remaining_time": "0:04:31", "throughput": 1709.87, "total_tokens": 9753552}
|
| 7303 |
+
{"current_steps": 36420, "total_steps": 38150, "loss": 0.638, "lr": 3.12911649927497e-07, "epoch": 9.546526867627785, "percentage": 95.47, "elapsed_time": "1:35:04", "remaining_time": "0:04:30", "throughput": 1709.93, "total_tokens": 9755072}
|
| 7304 |
+
{"current_steps": 36425, "total_steps": 38150, "loss": 0.4726, "lr": 3.111103209829408e-07, "epoch": 9.5478374836173, "percentage": 95.48, "elapsed_time": "1:35:05", "remaining_time": "0:04:30", "throughput": 1709.96, "total_tokens": 9756448}
|
| 7305 |
+
{"current_steps": 36430, "total_steps": 38150, "loss": 0.4905, "lr": 3.0931415938033336e-07, "epoch": 9.549148099606816, "percentage": 95.49, "elapsed_time": "1:35:06", "remaining_time": "0:04:29", "throughput": 1710.02, "total_tokens": 9757920}
|
| 7306 |
+
{"current_steps": 36435, "total_steps": 38150, "loss": 0.4567, "lr": 3.075231654956129e-07, "epoch": 9.55045871559633, "percentage": 95.5, "elapsed_time": "1:35:06", "remaining_time": "0:04:28", "throughput": 1710.01, "total_tokens": 9759008}
|
| 7307 |
+
{"current_steps": 36440, "total_steps": 38150, "loss": 0.3707, "lr": 3.0573733970362674e-07, "epoch": 9.551769331585845, "percentage": 95.52, "elapsed_time": "1:35:07", "remaining_time": "0:04:27", "throughput": 1709.99, "total_tokens": 9759904}
|
| 7308 |
+
{"current_steps": 36445, "total_steps": 38150, "loss": 0.4859, "lr": 3.0395668237814813e-07, "epoch": 9.55307994757536, "percentage": 95.53, "elapsed_time": "1:35:08", "remaining_time": "0:04:27", "throughput": 1710.06, "total_tokens": 9761520}
|
| 7309 |
+
{"current_steps": 36450, "total_steps": 38150, "loss": 0.5199, "lr": 3.0218119389186503e-07, "epoch": 9.554390563564876, "percentage": 95.54, "elapsed_time": "1:35:08", "remaining_time": "0:04:26", "throughput": 1710.14, "total_tokens": 9763184}
|
| 7310 |
+
{"current_steps": 36455, "total_steps": 38150, "loss": 0.4943, "lr": 3.0041087461638304e-07, "epoch": 9.55570117955439, "percentage": 95.56, "elapsed_time": "1:35:09", "remaining_time": "0:04:25", "throughput": 1710.13, "total_tokens": 9764224}
|
| 7311 |
+
{"current_steps": 36460, "total_steps": 38150, "loss": 0.5861, "lr": 2.986457249222252e-07, "epoch": 9.557011795543906, "percentage": 95.57, "elapsed_time": "1:35:10", "remaining_time": "0:04:24", "throughput": 1710.19, "total_tokens": 9765696}
|
| 7312 |
+
{"current_steps": 36465, "total_steps": 38150, "loss": 0.459, "lr": 2.968857451788404e-07, "epoch": 9.558322411533421, "percentage": 95.58, "elapsed_time": "1:35:10", "remaining_time": "0:04:23", "throughput": 1710.24, "total_tokens": 9767072}
|
| 7313 |
+
{"current_steps": 36470, "total_steps": 38150, "loss": 0.3379, "lr": 2.951309357545812e-07, "epoch": 9.559633027522935, "percentage": 95.6, "elapsed_time": "1:35:11", "remaining_time": "0:04:23", "throughput": 1710.28, "total_tokens": 9768400}
|
| 7314 |
+
{"current_steps": 36475, "total_steps": 38150, "loss": 0.268, "lr": 2.9338129701673434e-07, "epoch": 9.56094364351245, "percentage": 95.61, "elapsed_time": "1:35:12", "remaining_time": "0:04:22", "throughput": 1710.32, "total_tokens": 9769696}
|
| 7315 |
+
{"current_steps": 36480, "total_steps": 38150, "loss": 0.4903, "lr": 2.916368293314903e-07, "epoch": 9.562254259501966, "percentage": 95.62, "elapsed_time": "1:35:12", "remaining_time": "0:04:21", "throughput": 1710.35, "total_tokens": 9771040}
|
| 7316 |
+
{"current_steps": 36485, "total_steps": 38150, "loss": 0.3791, "lr": 2.898975330639653e-07, "epoch": 9.563564875491481, "percentage": 95.64, "elapsed_time": "1:35:13", "remaining_time": "0:04:20", "throughput": 1710.39, "total_tokens": 9772352}
|
| 7317 |
+
{"current_steps": 36490, "total_steps": 38150, "loss": 0.5536, "lr": 2.8816340857819034e-07, "epoch": 9.564875491480995, "percentage": 95.65, "elapsed_time": "1:35:14", "remaining_time": "0:04:19", "throughput": 1710.42, "total_tokens": 9773712}
|
| 7318 |
+
{"current_steps": 36495, "total_steps": 38150, "loss": 0.4325, "lr": 2.8643445623711684e-07, "epoch": 9.56618610747051, "percentage": 95.66, "elapsed_time": "1:35:14", "remaining_time": "0:04:19", "throughput": 1710.44, "total_tokens": 9774896}
|
| 7319 |
+
{"current_steps": 36500, "total_steps": 38150, "loss": 0.4535, "lr": 2.847106764026081e-07, "epoch": 9.567496723460026, "percentage": 95.67, "elapsed_time": "1:35:15", "remaining_time": "0:04:18", "throughput": 1710.53, "total_tokens": 9776672}
|
| 7320 |
+
{"current_steps": 36505, "total_steps": 38150, "loss": 0.4658, "lr": 2.8299206943545044e-07, "epoch": 9.568807339449542, "percentage": 95.69, "elapsed_time": "1:35:16", "remaining_time": "0:04:17", "throughput": 1710.51, "total_tokens": 9777616}
|
| 7321 |
+
{"current_steps": 36510, "total_steps": 38150, "loss": 0.4222, "lr": 2.812786356953451e-07, "epoch": 9.570117955439056, "percentage": 95.7, "elapsed_time": "1:35:16", "remaining_time": "0:04:16", "throughput": 1710.52, "total_tokens": 9778720}
|
| 7322 |
+
{"current_steps": 36515, "total_steps": 38150, "loss": 0.4351, "lr": 2.795703755409107e-07, "epoch": 9.571428571428571, "percentage": 95.71, "elapsed_time": "1:35:17", "remaining_time": "0:04:16", "throughput": 1710.59, "total_tokens": 9780304}
|
| 7323 |
+
{"current_steps": 36520, "total_steps": 38150, "loss": 0.5777, "lr": 2.7786728932968354e-07, "epoch": 9.572739187418087, "percentage": 95.73, "elapsed_time": "1:35:18", "remaining_time": "0:04:15", "throughput": 1710.64, "total_tokens": 9781856}
|
| 7324 |
+
{"current_steps": 36525, "total_steps": 38150, "loss": 0.3893, "lr": 2.761693774181173e-07, "epoch": 9.574049803407602, "percentage": 95.74, "elapsed_time": "1:35:18", "remaining_time": "0:04:14", "throughput": 1710.69, "total_tokens": 9783312}
|
| 7325 |
+
{"current_steps": 36530, "total_steps": 38150, "loss": 0.3799, "lr": 2.7447664016157783e-07, "epoch": 9.575360419397116, "percentage": 95.75, "elapsed_time": "1:35:19", "remaining_time": "0:04:13", "throughput": 1710.75, "total_tokens": 9784832}
|
| 7326 |
+
{"current_steps": 36535, "total_steps": 38150, "loss": 0.3913, "lr": 2.727890779143566e-07, "epoch": 9.576671035386632, "percentage": 95.77, "elapsed_time": "1:35:20", "remaining_time": "0:04:12", "throughput": 1710.76, "total_tokens": 9786016}
|
| 7327 |
+
{"current_steps": 36540, "total_steps": 38150, "loss": 0.4441, "lr": 2.7110669102965723e-07, "epoch": 9.577981651376147, "percentage": 95.78, "elapsed_time": "1:35:20", "remaining_time": "0:04:12", "throughput": 1710.8, "total_tokens": 9787312}
|
| 7328 |
+
{"current_steps": 36545, "total_steps": 38150, "loss": 0.5725, "lr": 2.694294798595981e-07, "epoch": 9.579292267365663, "percentage": 95.79, "elapsed_time": "1:35:21", "remaining_time": "0:04:11", "throughput": 1710.79, "total_tokens": 9788368}
|
| 7329 |
+
{"current_steps": 36550, "total_steps": 38150, "loss": 0.6227, "lr": 2.677574447552178e-07, "epoch": 9.580602883355176, "percentage": 95.81, "elapsed_time": "1:35:22", "remaining_time": "0:04:10", "throughput": 1710.81, "total_tokens": 9789632}
|
| 7330 |
+
{"current_steps": 36555, "total_steps": 38150, "loss": 0.497, "lr": 2.660905860664697e-07, "epoch": 9.581913499344692, "percentage": 95.82, "elapsed_time": "1:35:22", "remaining_time": "0:04:09", "throughput": 1710.86, "total_tokens": 9791040}
|
| 7331 |
+
{"current_steps": 36560, "total_steps": 38150, "loss": 0.2953, "lr": 2.6442890414222487e-07, "epoch": 9.583224115334207, "percentage": 95.83, "elapsed_time": "1:35:23", "remaining_time": "0:04:08", "throughput": 1710.88, "total_tokens": 9792320}
|
| 7332 |
+
{"current_steps": 36565, "total_steps": 38150, "loss": 0.524, "lr": 2.6277239933027163e-07, "epoch": 9.584534731323721, "percentage": 95.85, "elapsed_time": "1:35:24", "remaining_time": "0:04:08", "throughput": 1710.89, "total_tokens": 9793552}
|
| 7333 |
+
{"current_steps": 36570, "total_steps": 38150, "loss": 0.3568, "lr": 2.611210719773133e-07, "epoch": 9.585845347313237, "percentage": 95.86, "elapsed_time": "1:35:24", "remaining_time": "0:04:07", "throughput": 1710.95, "total_tokens": 9795136}
|
| 7334 |
+
{"current_steps": 36575, "total_steps": 38150, "loss": 0.5105, "lr": 2.5947492242896776e-07, "epoch": 9.587155963302752, "percentage": 95.87, "elapsed_time": "1:35:25", "remaining_time": "0:04:06", "throughput": 1711.04, "total_tokens": 9796848}
|
| 7335 |
+
{"current_steps": 36580, "total_steps": 38150, "loss": 0.2868, "lr": 2.5783395102977057e-07, "epoch": 9.588466579292268, "percentage": 95.88, "elapsed_time": "1:35:26", "remaining_time": "0:04:05", "throughput": 1711.04, "total_tokens": 9797904}
|
| 7336 |
+
{"current_steps": 36585, "total_steps": 38150, "loss": 0.503, "lr": 2.5619815812318037e-07, "epoch": 9.589777195281782, "percentage": 95.9, "elapsed_time": "1:35:26", "remaining_time": "0:04:04", "throughput": 1711.07, "total_tokens": 9799248}
|
| 7337 |
+
{"current_steps": 36590, "total_steps": 38150, "loss": 0.3767, "lr": 2.5456754405155934e-07, "epoch": 9.591087811271297, "percentage": 95.91, "elapsed_time": "1:35:27", "remaining_time": "0:04:04", "throughput": 1711.07, "total_tokens": 9800304}
|
| 7338 |
+
{"current_steps": 36595, "total_steps": 38150, "loss": 0.3537, "lr": 2.5294210915619564e-07, "epoch": 9.592398427260813, "percentage": 95.92, "elapsed_time": "1:35:28", "remaining_time": "0:04:03", "throughput": 1711.09, "total_tokens": 9801520}
|
| 7339 |
+
{"current_steps": 36600, "total_steps": 38150, "loss": 0.5526, "lr": 2.5132185377728656e-07, "epoch": 9.593709043250328, "percentage": 95.94, "elapsed_time": "1:35:28", "remaining_time": "0:04:02", "throughput": 1711.12, "total_tokens": 9802720}
|
| 7340 |
+
{"current_steps": 36605, "total_steps": 38150, "loss": 0.5554, "lr": 2.497067782539553e-07, "epoch": 9.595019659239842, "percentage": 95.95, "elapsed_time": "1:35:29", "remaining_time": "0:04:01", "throughput": 1711.19, "total_tokens": 9804384}
|
| 7341 |
+
{"current_steps": 36610, "total_steps": 38150, "loss": 0.4207, "lr": 2.480968829242286e-07, "epoch": 9.596330275229358, "percentage": 95.96, "elapsed_time": "1:35:30", "remaining_time": "0:04:01", "throughput": 1711.25, "total_tokens": 9805936}
|
| 7342 |
+
{"current_steps": 36615, "total_steps": 38150, "loss": 0.3299, "lr": 2.4649216812505373e-07, "epoch": 9.597640891218873, "percentage": 95.98, "elapsed_time": "1:35:30", "remaining_time": "0:04:00", "throughput": 1711.31, "total_tokens": 9807392}
|
| 7343 |
+
{"current_steps": 36620, "total_steps": 38150, "loss": 0.5301, "lr": 2.448926341923008e-07, "epoch": 9.598951507208389, "percentage": 95.99, "elapsed_time": "1:35:31", "remaining_time": "0:03:59", "throughput": 1711.33, "total_tokens": 9808624}
|
| 7344 |
+
{"current_steps": 36625, "total_steps": 38150, "loss": 0.4366, "lr": 2.4329828146074095e-07, "epoch": 9.600262123197902, "percentage": 96.0, "elapsed_time": "1:35:32", "remaining_time": "0:03:58", "throughput": 1711.35, "total_tokens": 9809808}
|
| 7345 |
+
{"current_steps": 36630, "total_steps": 38150, "loss": 0.3898, "lr": 2.417091102640795e-07, "epoch": 9.601572739187418, "percentage": 96.02, "elapsed_time": "1:35:32", "remaining_time": "0:03:57", "throughput": 1711.37, "total_tokens": 9810992}
|
| 7346 |
+
{"current_steps": 36635, "total_steps": 38150, "loss": 0.5057, "lr": 2.401251209349198e-07, "epoch": 9.602883355176933, "percentage": 96.03, "elapsed_time": "1:35:33", "remaining_time": "0:03:57", "throughput": 1711.41, "total_tokens": 9812416}
|
| 7347 |
+
{"current_steps": 36640, "total_steps": 38150, "loss": 0.4565, "lr": 2.385463138047911e-07, "epoch": 9.604193971166449, "percentage": 96.04, "elapsed_time": "1:35:34", "remaining_time": "0:03:56", "throughput": 1711.42, "total_tokens": 9813520}
|
| 7348 |
+
{"current_steps": 36645, "total_steps": 38150, "loss": 0.5264, "lr": 2.3697268920413462e-07, "epoch": 9.605504587155963, "percentage": 96.06, "elapsed_time": "1:35:34", "remaining_time": "0:03:55", "throughput": 1711.48, "total_tokens": 9814960}
|
| 7349 |
+
{"current_steps": 36650, "total_steps": 38150, "loss": 0.5389, "lr": 2.3540424746230916e-07, "epoch": 9.606815203145478, "percentage": 96.07, "elapsed_time": "1:35:35", "remaining_time": "0:03:54", "throughput": 1711.48, "total_tokens": 9816000}
|
| 7350 |
+
{"current_steps": 36655, "total_steps": 38150, "loss": 0.8096, "lr": 2.3384098890758267e-07, "epoch": 9.608125819134994, "percentage": 96.08, "elapsed_time": "1:35:36", "remaining_time": "0:03:53", "throughput": 1711.5, "total_tokens": 9817200}
|
| 7351 |
+
{"current_steps": 36660, "total_steps": 38150, "loss": 0.5564, "lr": 2.322829138671434e-07, "epoch": 9.609436435124508, "percentage": 96.09, "elapsed_time": "1:35:36", "remaining_time": "0:03:53", "throughput": 1711.51, "total_tokens": 9818336}
|
| 7352 |
+
{"current_steps": 36665, "total_steps": 38150, "loss": 0.6724, "lr": 2.3073002266709443e-07, "epoch": 9.610747051114023, "percentage": 96.11, "elapsed_time": "1:35:37", "remaining_time": "0:03:52", "throughput": 1711.58, "total_tokens": 9820016}
|
| 7353 |
+
{"current_steps": 36670, "total_steps": 38150, "loss": 0.5387, "lr": 2.2918231563245629e-07, "epoch": 9.612057667103539, "percentage": 96.12, "elapsed_time": "1:35:38", "remaining_time": "0:03:51", "throughput": 1711.58, "total_tokens": 9821072}
|
| 7354 |
+
{"current_steps": 36675, "total_steps": 38150, "loss": 0.282, "lr": 2.2763979308715877e-07, "epoch": 9.613368283093054, "percentage": 96.13, "elapsed_time": "1:35:38", "remaining_time": "0:03:50", "throughput": 1711.58, "total_tokens": 9822144}
|
| 7355 |
+
{"current_steps": 36680, "total_steps": 38150, "loss": 0.4783, "lr": 2.2610245535404638e-07, "epoch": 9.614678899082568, "percentage": 96.15, "elapsed_time": "1:35:39", "remaining_time": "0:03:50", "throughput": 1711.57, "total_tokens": 9823104}
|
| 7356 |
+
{"current_steps": 36685, "total_steps": 38150, "loss": 0.6589, "lr": 2.2457030275488676e-07, "epoch": 9.615989515072084, "percentage": 96.16, "elapsed_time": "1:35:39", "remaining_time": "0:03:49", "throughput": 1711.63, "total_tokens": 9824736}
|
| 7357 |
+
{"current_steps": 36690, "total_steps": 38150, "loss": 0.3905, "lr": 2.2304333561035396e-07, "epoch": 9.617300131061599, "percentage": 96.17, "elapsed_time": "1:35:40", "remaining_time": "0:03:48", "throughput": 1711.64, "total_tokens": 9825872}
|
| 7358 |
+
{"current_steps": 36695, "total_steps": 38150, "loss": 0.3989, "lr": 2.2152155424003952e-07, "epoch": 9.618610747051115, "percentage": 96.19, "elapsed_time": "1:35:41", "remaining_time": "0:03:47", "throughput": 1711.68, "total_tokens": 9827168}
|
| 7359 |
+
{"current_steps": 36700, "total_steps": 38150, "loss": 0.5941, "lr": 2.2000495896244976e-07, "epoch": 9.619921363040628, "percentage": 96.2, "elapsed_time": "1:35:41", "remaining_time": "0:03:46", "throughput": 1711.71, "total_tokens": 9828448}
|
| 7360 |
+
{"current_steps": 36705, "total_steps": 38150, "loss": 0.5156, "lr": 2.1849355009500582e-07, "epoch": 9.621231979030144, "percentage": 96.21, "elapsed_time": "1:35:42", "remaining_time": "0:03:46", "throughput": 1711.74, "total_tokens": 9829648}
|
| 7361 |
+
{"current_steps": 36710, "total_steps": 38150, "loss": 0.4565, "lr": 2.1698732795404074e-07, "epoch": 9.62254259501966, "percentage": 96.23, "elapsed_time": "1:35:43", "remaining_time": "0:03:45", "throughput": 1711.79, "total_tokens": 9831216}
|
| 7362 |
+
{"current_steps": 36715, "total_steps": 38150, "loss": 0.3016, "lr": 2.1548629285480792e-07, "epoch": 9.623853211009175, "percentage": 96.24, "elapsed_time": "1:35:43", "remaining_time": "0:03:44", "throughput": 1711.88, "total_tokens": 9832976}
|
| 7363 |
+
{"current_steps": 36720, "total_steps": 38150, "loss": 0.5339, "lr": 2.1399044511146993e-07, "epoch": 9.625163826998689, "percentage": 96.25, "elapsed_time": "1:35:44", "remaining_time": "0:03:43", "throughput": 1711.85, "total_tokens": 9833904}
|
| 7364 |
+
{"current_steps": 36725, "total_steps": 38150, "loss": 0.4297, "lr": 2.124997850371041e-07, "epoch": 9.626474442988204, "percentage": 96.26, "elapsed_time": "1:35:45", "remaining_time": "0:03:42", "throughput": 1711.85, "total_tokens": 9834944}
|
| 7365 |
+
{"current_steps": 36730, "total_steps": 38150, "loss": 0.2881, "lr": 2.1101431294370533e-07, "epoch": 9.62778505897772, "percentage": 96.28, "elapsed_time": "1:35:45", "remaining_time": "0:03:42", "throughput": 1711.83, "total_tokens": 9835920}
|
| 7366 |
+
{"current_steps": 36735, "total_steps": 38150, "loss": 0.5307, "lr": 2.0953402914217768e-07, "epoch": 9.629095674967235, "percentage": 96.29, "elapsed_time": "1:35:46", "remaining_time": "0:03:41", "throughput": 1711.87, "total_tokens": 9837264}
|
| 7367 |
+
{"current_steps": 36740, "total_steps": 38150, "loss": 0.7567, "lr": 2.0805893394234e-07, "epoch": 9.63040629095675, "percentage": 96.3, "elapsed_time": "1:35:47", "remaining_time": "0:03:40", "throughput": 1711.87, "total_tokens": 9838336}
|
| 7368 |
+
{"current_steps": 36745, "total_steps": 38150, "loss": 0.5949, "lr": 2.065890276529342e-07, "epoch": 9.631716906946265, "percentage": 96.32, "elapsed_time": "1:35:47", "remaining_time": "0:03:39", "throughput": 1711.87, "total_tokens": 9839424}
|
| 7369 |
+
{"current_steps": 36750, "total_steps": 38150, "loss": 0.4292, "lr": 2.051243105816031e-07, "epoch": 9.63302752293578, "percentage": 96.33, "elapsed_time": "1:35:48", "remaining_time": "0:03:38", "throughput": 1711.89, "total_tokens": 9840656}
|
| 7370 |
+
{"current_steps": 36755, "total_steps": 38150, "loss": 0.3512, "lr": 2.0366478303491255e-07, "epoch": 9.634338138925294, "percentage": 96.34, "elapsed_time": "1:35:49", "remaining_time": "0:03:38", "throughput": 1711.95, "total_tokens": 9842096}
|
| 7371 |
+
{"current_steps": 36760, "total_steps": 38150, "loss": 0.4569, "lr": 2.0221044531834043e-07, "epoch": 9.63564875491481, "percentage": 96.36, "elapsed_time": "1:35:49", "remaining_time": "0:03:37", "throughput": 1711.97, "total_tokens": 9843392}
|
| 7372 |
+
{"current_steps": 36765, "total_steps": 38150, "loss": 0.5385, "lr": 2.0076129773627105e-07, "epoch": 9.636959370904325, "percentage": 96.37, "elapsed_time": "1:35:50", "remaining_time": "0:03:36", "throughput": 1712.03, "total_tokens": 9844896}
|
| 7373 |
+
{"current_steps": 36770, "total_steps": 38150, "loss": 0.645, "lr": 1.9931734059201457e-07, "epoch": 9.63826998689384, "percentage": 96.38, "elapsed_time": "1:35:51", "remaining_time": "0:03:35", "throughput": 1712.04, "total_tokens": 9846048}
|
| 7374 |
+
{"current_steps": 36775, "total_steps": 38150, "loss": 0.5354, "lr": 1.9787857418778478e-07, "epoch": 9.639580602883354, "percentage": 96.4, "elapsed_time": "1:35:51", "remaining_time": "0:03:35", "throughput": 1712.08, "total_tokens": 9847360}
|
| 7375 |
+
{"current_steps": 36780, "total_steps": 38150, "loss": 0.6062, "lr": 1.9644499882471578e-07, "epoch": 9.64089121887287, "percentage": 96.41, "elapsed_time": "1:35:52", "remaining_time": "0:03:34", "throughput": 1712.12, "total_tokens": 9848672}
|
| 7376 |
+
{"current_steps": 36785, "total_steps": 38150, "loss": 0.5071, "lr": 1.9501661480285095e-07, "epoch": 9.642201834862385, "percentage": 96.42, "elapsed_time": "1:35:52", "remaining_time": "0:03:33", "throughput": 1712.13, "total_tokens": 9849776}
|
| 7377 |
+
{"current_steps": 36790, "total_steps": 38150, "loss": 0.4328, "lr": 1.935934224211483e-07, "epoch": 9.643512450851901, "percentage": 96.44, "elapsed_time": "1:35:53", "remaining_time": "0:03:32", "throughput": 1712.17, "total_tokens": 9851200}
|
| 7378 |
+
{"current_steps": 36795, "total_steps": 38150, "loss": 0.7537, "lr": 1.9217542197748074e-07, "epoch": 9.644823066841415, "percentage": 96.45, "elapsed_time": "1:35:54", "remaining_time": "0:03:31", "throughput": 1712.19, "total_tokens": 9852560}
|
| 7379 |
+
{"current_steps": 36800, "total_steps": 38150, "loss": 0.5039, "lr": 1.9076261376863303e-07, "epoch": 9.64613368283093, "percentage": 96.46, "elapsed_time": "1:35:55", "remaining_time": "0:03:31", "throughput": 1712.22, "total_tokens": 9853872}
|
| 7380 |
+
{"current_steps": 36805, "total_steps": 38150, "loss": 0.3572, "lr": 1.8935499809030476e-07, "epoch": 9.647444298820446, "percentage": 96.47, "elapsed_time": "1:35:55", "remaining_time": "0:03:30", "throughput": 1712.3, "total_tokens": 9855584}
|
| 7381 |
+
{"current_steps": 36810, "total_steps": 38150, "loss": 0.4215, "lr": 1.8795257523710197e-07, "epoch": 9.648754914809961, "percentage": 96.49, "elapsed_time": "1:35:56", "remaining_time": "0:03:29", "throughput": 1712.3, "total_tokens": 9856624}
|
| 7382 |
+
{"current_steps": 36815, "total_steps": 38150, "loss": 0.4103, "lr": 1.8655534550255648e-07, "epoch": 9.650065530799475, "percentage": 96.5, "elapsed_time": "1:35:57", "remaining_time": "0:03:28", "throughput": 1712.38, "total_tokens": 9858384}
|
| 7383 |
+
{"current_steps": 36820, "total_steps": 38150, "loss": 0.8813, "lr": 1.8516330917910108e-07, "epoch": 9.65137614678899, "percentage": 96.51, "elapsed_time": "1:35:57", "remaining_time": "0:03:27", "throughput": 1712.36, "total_tokens": 9859328}
|
| 7384 |
+
{"current_steps": 36825, "total_steps": 38150, "loss": 0.5863, "lr": 1.8377646655808877e-07, "epoch": 9.652686762778506, "percentage": 96.53, "elapsed_time": "1:35:58", "remaining_time": "0:03:27", "throughput": 1712.44, "total_tokens": 9861040}
|
| 7385 |
+
{"current_steps": 36830, "total_steps": 38150, "loss": 0.5199, "lr": 1.8239481792978464e-07, "epoch": 9.653997378768022, "percentage": 96.54, "elapsed_time": "1:35:59", "remaining_time": "0:03:26", "throughput": 1712.44, "total_tokens": 9862128}
|
| 7386 |
+
{"current_steps": 36835, "total_steps": 38150, "loss": 0.5259, "lr": 1.8101836358336287e-07, "epoch": 9.655307994757536, "percentage": 96.55, "elapsed_time": "1:35:59", "remaining_time": "0:03:25", "throughput": 1712.5, "total_tokens": 9863664}
|
| 7387 |
+
{"current_steps": 36840, "total_steps": 38150, "loss": 0.5546, "lr": 1.7964710380691251e-07, "epoch": 9.656618610747051, "percentage": 96.57, "elapsed_time": "1:36:00", "remaining_time": "0:03:24", "throughput": 1712.48, "total_tokens": 9864592}
|
| 7388 |
+
{"current_steps": 36845, "total_steps": 38150, "loss": 0.5533, "lr": 1.782810388874373e-07, "epoch": 9.657929226736567, "percentage": 96.58, "elapsed_time": "1:36:01", "remaining_time": "0:03:24", "throughput": 1712.57, "total_tokens": 9866448}
|
| 7389 |
+
{"current_steps": 36850, "total_steps": 38150, "loss": 0.4996, "lr": 1.7692016911085295e-07, "epoch": 9.65923984272608, "percentage": 96.59, "elapsed_time": "1:36:01", "remaining_time": "0:03:23", "throughput": 1712.64, "total_tokens": 9868144}
|
| 7390 |
+
{"current_steps": 36855, "total_steps": 38150, "loss": 0.5635, "lr": 1.7556449476198445e-07, "epoch": 9.660550458715596, "percentage": 96.61, "elapsed_time": "1:36:02", "remaining_time": "0:03:22", "throughput": 1712.6, "total_tokens": 9869312}
|
| 7391 |
+
{"current_steps": 36860, "total_steps": 38150, "loss": 0.3785, "lr": 1.7421401612457423e-07, "epoch": 9.661861074705111, "percentage": 96.62, "elapsed_time": "1:36:03", "remaining_time": "0:03:21", "throughput": 1712.6, "total_tokens": 9870368}
|
| 7392 |
+
{"current_steps": 36865, "total_steps": 38150, "loss": 0.3791, "lr": 1.7286873348127676e-07, "epoch": 9.663171690694627, "percentage": 96.63, "elapsed_time": "1:36:04", "remaining_time": "0:03:20", "throughput": 1712.61, "total_tokens": 9871520}
|
| 7393 |
+
{"current_steps": 36870, "total_steps": 38150, "loss": 0.6127, "lr": 1.715286471136529e-07, "epoch": 9.66448230668414, "percentage": 96.64, "elapsed_time": "1:36:04", "remaining_time": "0:03:20", "throughput": 1712.66, "total_tokens": 9872928}
|
| 7394 |
+
{"current_steps": 36875, "total_steps": 38150, "loss": 0.3197, "lr": 1.7019375730218667e-07, "epoch": 9.665792922673656, "percentage": 96.66, "elapsed_time": "1:36:05", "remaining_time": "0:03:19", "throughput": 1712.69, "total_tokens": 9874160}
|
| 7395 |
+
{"current_steps": 36880, "total_steps": 38150, "loss": 0.3932, "lr": 1.688640643262629e-07, "epoch": 9.667103538663172, "percentage": 96.67, "elapsed_time": "1:36:05", "remaining_time": "0:03:18", "throughput": 1712.71, "total_tokens": 9875328}
|
| 7396 |
+
{"current_steps": 36885, "total_steps": 38150, "loss": 0.3459, "lr": 1.6753956846418395e-07, "epoch": 9.668414154652687, "percentage": 96.68, "elapsed_time": "1:36:06", "remaining_time": "0:03:17", "throughput": 1712.73, "total_tokens": 9876624}
|
| 7397 |
+
{"current_steps": 36890, "total_steps": 38150, "loss": 0.6014, "lr": 1.6622026999316697e-07, "epoch": 9.669724770642201, "percentage": 96.7, "elapsed_time": "1:36:07", "remaining_time": "0:03:16", "throughput": 1712.77, "total_tokens": 9877984}
|
| 7398 |
+
{"current_steps": 36895, "total_steps": 38150, "loss": 0.333, "lr": 1.6490616918933554e-07, "epoch": 9.671035386631717, "percentage": 96.71, "elapsed_time": "1:36:07", "remaining_time": "0:03:16", "throughput": 1712.79, "total_tokens": 9879152}
|
| 7399 |
+
{"current_steps": 36900, "total_steps": 38150, "loss": 0.3574, "lr": 1.6359726632773353e-07, "epoch": 9.672346002621232, "percentage": 96.72, "elapsed_time": "1:36:08", "remaining_time": "0:03:15", "throughput": 1712.79, "total_tokens": 9880192}
|
| 7400 |
+
{"current_steps": 36905, "total_steps": 38150, "loss": 0.4911, "lr": 1.6229356168231125e-07, "epoch": 9.673656618610748, "percentage": 96.74, "elapsed_time": "1:36:09", "remaining_time": "0:03:14", "throughput": 1712.81, "total_tokens": 9881408}
|
| 7401 |
+
{"current_steps": 36910, "total_steps": 38150, "loss": 0.6371, "lr": 1.609950555259282e-07, "epoch": 9.674967234600262, "percentage": 96.75, "elapsed_time": "1:36:09", "remaining_time": "0:03:13", "throughput": 1712.89, "total_tokens": 9883088}
|
| 7402 |
+
{"current_steps": 36915, "total_steps": 38150, "loss": 0.338, "lr": 1.5970174813036144e-07, "epoch": 9.676277850589777, "percentage": 96.76, "elapsed_time": "1:36:10", "remaining_time": "0:03:13", "throughput": 1712.96, "total_tokens": 9884688}
|
| 7403 |
+
{"current_steps": 36920, "total_steps": 38150, "loss": 0.5053, "lr": 1.5841363976629998e-07, "epoch": 9.677588466579293, "percentage": 96.78, "elapsed_time": "1:36:11", "remaining_time": "0:03:12", "throughput": 1713.02, "total_tokens": 9886176}
|
| 7404 |
+
{"current_steps": 36925, "total_steps": 38150, "loss": 0.3, "lr": 1.5713073070333927e-07, "epoch": 9.678899082568808, "percentage": 96.79, "elapsed_time": "1:36:11", "remaining_time": "0:03:11", "throughput": 1713.02, "total_tokens": 9887232}
|
| 7405 |
+
{"current_steps": 36930, "total_steps": 38150, "loss": 0.4015, "lr": 1.5585302120998956e-07, "epoch": 9.680209698558322, "percentage": 96.8, "elapsed_time": "1:36:12", "remaining_time": "0:03:10", "throughput": 1713.08, "total_tokens": 9888784}
|
| 7406 |
+
{"current_steps": 36935, "total_steps": 38150, "loss": 0.3544, "lr": 1.545805115536786e-07, "epoch": 9.681520314547837, "percentage": 96.82, "elapsed_time": "1:36:13", "remaining_time": "0:03:09", "throughput": 1713.09, "total_tokens": 9889888}
|
| 7407 |
+
{"current_steps": 36940, "total_steps": 38150, "loss": 0.4375, "lr": 1.5331320200073497e-07, "epoch": 9.682830930537353, "percentage": 96.83, "elapsed_time": "1:36:13", "remaining_time": "0:03:09", "throughput": 1713.14, "total_tokens": 9891424}
|
| 7408 |
+
{"current_steps": 36945, "total_steps": 38150, "loss": 0.3923, "lr": 1.520510928164076e-07, "epoch": 9.684141546526867, "percentage": 96.84, "elapsed_time": "1:36:14", "remaining_time": "0:03:08", "throughput": 1713.14, "total_tokens": 9892464}
|
| 7409 |
+
{"current_steps": 36950, "total_steps": 38150, "loss": 0.4986, "lr": 1.5079418426485193e-07, "epoch": 9.685452162516382, "percentage": 96.85, "elapsed_time": "1:36:15", "remaining_time": "0:03:07", "throughput": 1713.15, "total_tokens": 9893632}
|
| 7410 |
+
{"current_steps": 36955, "total_steps": 38150, "loss": 0.3674, "lr": 1.49542476609138e-07, "epoch": 9.686762778505898, "percentage": 96.87, "elapsed_time": "1:36:15", "remaining_time": "0:03:06", "throughput": 1713.2, "total_tokens": 9895056}
|
| 7411 |
+
{"current_steps": 36960, "total_steps": 38150, "loss": 0.432, "lr": 1.482959701112452e-07, "epoch": 9.688073394495413, "percentage": 96.88, "elapsed_time": "1:36:16", "remaining_time": "0:03:05", "throughput": 1713.23, "total_tokens": 9896400}
|
| 7412 |
+
{"current_steps": 36965, "total_steps": 38150, "loss": 0.4488, "lr": 1.47054665032062e-07, "epoch": 9.689384010484927, "percentage": 96.89, "elapsed_time": "1:36:17", "remaining_time": "0:03:05", "throughput": 1713.28, "total_tokens": 9897920}
|
| 7413 |
+
{"current_steps": 36970, "total_steps": 38150, "loss": 0.5548, "lr": 1.4581856163140008e-07, "epoch": 9.690694626474443, "percentage": 96.91, "elapsed_time": "1:36:17", "remaining_time": "0:03:04", "throughput": 1713.38, "total_tokens": 9899696}
|
| 7414 |
+
{"current_steps": 36975, "total_steps": 38150, "loss": 0.4107, "lr": 1.4458766016796632e-07, "epoch": 9.692005242463958, "percentage": 96.92, "elapsed_time": "1:36:18", "remaining_time": "0:03:03", "throughput": 1713.43, "total_tokens": 9901232}
|
| 7415 |
+
{"current_steps": 36980, "total_steps": 38150, "loss": 0.3285, "lr": 1.4336196089938802e-07, "epoch": 9.693315858453474, "percentage": 96.93, "elapsed_time": "1:36:19", "remaining_time": "0:03:02", "throughput": 1713.49, "total_tokens": 9902736}
|
| 7416 |
+
{"current_steps": 36985, "total_steps": 38150, "loss": 0.4428, "lr": 1.4214146408220163e-07, "epoch": 9.694626474442988, "percentage": 96.95, "elapsed_time": "1:36:19", "remaining_time": "0:03:02", "throughput": 1713.56, "total_tokens": 9904272}
|
| 7417 |
+
{"current_steps": 36990, "total_steps": 38150, "loss": 0.2677, "lr": 1.4092616997185837e-07, "epoch": 9.695937090432503, "percentage": 96.96, "elapsed_time": "1:36:20", "remaining_time": "0:03:01", "throughput": 1713.55, "total_tokens": 9905376}
|
| 7418 |
+
{"current_steps": 36995, "total_steps": 38150, "loss": 0.4453, "lr": 1.397160788227131e-07, "epoch": 9.697247706422019, "percentage": 96.97, "elapsed_time": "1:36:21", "remaining_time": "0:03:00", "throughput": 1713.54, "total_tokens": 9906368}
|
| 7419 |
+
{"current_steps": 37000, "total_steps": 38150, "loss": 0.4303, "lr": 1.385111908880382e-07, "epoch": 9.698558322411534, "percentage": 96.99, "elapsed_time": "1:36:21", "remaining_time": "0:02:59", "throughput": 1713.59, "total_tokens": 9907760}
|
| 7420 |
+
{"current_steps": 37005, "total_steps": 38150, "loss": 0.613, "lr": 1.3731150642001255e-07, "epoch": 9.699868938401048, "percentage": 97.0, "elapsed_time": "1:36:22", "remaining_time": "0:02:58", "throughput": 1713.65, "total_tokens": 9909280}
|
| 7421 |
+
{"current_steps": 37010, "total_steps": 38150, "loss": 0.517, "lr": 1.3611702566972694e-07, "epoch": 9.701179554390563, "percentage": 97.01, "elapsed_time": "1:36:23", "remaining_time": "0:02:58", "throughput": 1713.67, "total_tokens": 9910608}
|
| 7422 |
+
{"current_steps": 37015, "total_steps": 38150, "loss": 0.9993, "lr": 1.3492774888718974e-07, "epoch": 9.702490170380079, "percentage": 97.02, "elapsed_time": "1:36:23", "remaining_time": "0:02:57", "throughput": 1713.66, "total_tokens": 9911600}
|
| 7423 |
+
{"current_steps": 37020, "total_steps": 38150, "loss": 0.5074, "lr": 1.3374367632131014e-07, "epoch": 9.703800786369595, "percentage": 97.04, "elapsed_time": "1:36:24", "remaining_time": "0:02:56", "throughput": 1713.69, "total_tokens": 9912944}
|
| 7424 |
+
{"current_steps": 37025, "total_steps": 38150, "loss": 0.6065, "lr": 1.3256480821991213e-07, "epoch": 9.705111402359108, "percentage": 97.05, "elapsed_time": "1:36:25", "remaining_time": "0:02:55", "throughput": 1713.69, "total_tokens": 9914080}
|
| 7425 |
+
{"current_steps": 37030, "total_steps": 38150, "loss": 0.4257, "lr": 1.3139114482973448e-07, "epoch": 9.706422018348624, "percentage": 97.06, "elapsed_time": "1:36:25", "remaining_time": "0:02:54", "throughput": 1713.68, "total_tokens": 9915088}
|
| 7426 |
+
{"current_steps": 37035, "total_steps": 38150, "loss": 0.3233, "lr": 1.3022268639641956e-07, "epoch": 9.70773263433814, "percentage": 97.08, "elapsed_time": "1:36:26", "remaining_time": "0:02:54", "throughput": 1713.69, "total_tokens": 9916208}
|
| 7427 |
+
{"current_steps": 37040, "total_steps": 38150, "loss": 0.443, "lr": 1.290594331645245e-07, "epoch": 9.709043250327653, "percentage": 97.09, "elapsed_time": "1:36:27", "remaining_time": "0:02:53", "throughput": 1713.71, "total_tokens": 9917360}
|
| 7428 |
+
{"current_steps": 37045, "total_steps": 38150, "loss": 0.5076, "lr": 1.279013853775185e-07, "epoch": 9.710353866317169, "percentage": 97.1, "elapsed_time": "1:36:27", "remaining_time": "0:02:52", "throughput": 1713.75, "total_tokens": 9918784}
|
| 7429 |
+
{"current_steps": 37050, "total_steps": 38150, "loss": 0.2904, "lr": 1.267485432777743e-07, "epoch": 9.711664482306684, "percentage": 97.12, "elapsed_time": "1:36:28", "remaining_time": "0:02:51", "throughput": 1713.82, "total_tokens": 9920272}
|
| 7430 |
+
{"current_steps": 37055, "total_steps": 38150, "loss": 0.4639, "lr": 1.2560090710658223e-07, "epoch": 9.7129750982962, "percentage": 97.13, "elapsed_time": "1:36:29", "remaining_time": "0:02:51", "throughput": 1713.88, "total_tokens": 9921792}
|
| 7431 |
+
{"current_steps": 37060, "total_steps": 38150, "loss": 0.5242, "lr": 1.2445847710414183e-07, "epoch": 9.714285714285714, "percentage": 97.14, "elapsed_time": "1:36:29", "remaining_time": "0:02:50", "throughput": 1713.89, "total_tokens": 9922992}
|
| 7432 |
+
{"current_steps": 37065, "total_steps": 38150, "loss": 0.4581, "lr": 1.233212535095618e-07, "epoch": 9.715596330275229, "percentage": 97.16, "elapsed_time": "1:36:30", "remaining_time": "0:02:49", "throughput": 1713.91, "total_tokens": 9924256}
|
| 7433 |
+
{"current_steps": 37070, "total_steps": 38150, "loss": 0.5968, "lr": 1.2218923656085735e-07, "epoch": 9.716906946264745, "percentage": 97.17, "elapsed_time": "1:36:31", "remaining_time": "0:02:48", "throughput": 1713.9, "total_tokens": 9925184}
|
| 7434 |
+
{"current_steps": 37075, "total_steps": 38150, "loss": 0.5842, "lr": 1.2106242649496112e-07, "epoch": 9.71821756225426, "percentage": 97.18, "elapsed_time": "1:36:31", "remaining_time": "0:02:47", "throughput": 1713.94, "total_tokens": 9926528}
|
| 7435 |
+
{"current_steps": 37080, "total_steps": 38150, "loss": 0.3363, "lr": 1.199408235477123e-07, "epoch": 9.719528178243774, "percentage": 97.2, "elapsed_time": "1:36:32", "remaining_time": "0:02:47", "throughput": 1713.97, "total_tokens": 9927920}
|
| 7436 |
+
{"current_steps": 37085, "total_steps": 38150, "loss": 0.4191, "lr": 1.188244279538564e-07, "epoch": 9.72083879423329, "percentage": 97.21, "elapsed_time": "1:36:33", "remaining_time": "0:02:46", "throughput": 1714.02, "total_tokens": 9929344}
|
| 7437 |
+
{"current_steps": 37090, "total_steps": 38150, "loss": 0.3482, "lr": 1.1771323994705929e-07, "epoch": 9.722149410222805, "percentage": 97.22, "elapsed_time": "1:36:33", "remaining_time": "0:02:45", "throughput": 1714.03, "total_tokens": 9930560}
|
| 7438 |
+
{"current_steps": 37095, "total_steps": 38150, "loss": 0.3133, "lr": 1.1660725975988773e-07, "epoch": 9.72346002621232, "percentage": 97.23, "elapsed_time": "1:36:34", "remaining_time": "0:02:44", "throughput": 1714.04, "total_tokens": 9931680}
|
| 7439 |
+
{"current_steps": 37100, "total_steps": 38150, "loss": 0.467, "lr": 1.1550648762382044e-07, "epoch": 9.724770642201834, "percentage": 97.25, "elapsed_time": "1:36:34", "remaining_time": "0:02:44", "throughput": 1714.03, "total_tokens": 9932704}
|
| 7440 |
+
{"current_steps": 37105, "total_steps": 38150, "loss": 0.5936, "lr": 1.144109237692509e-07, "epoch": 9.72608125819135, "percentage": 97.26, "elapsed_time": "1:36:35", "remaining_time": "0:02:43", "throughput": 1714.04, "total_tokens": 9933776}
|
| 7441 |
+
{"current_steps": 37110, "total_steps": 38150, "loss": 0.3316, "lr": 1.1332056842547344e-07, "epoch": 9.727391874180865, "percentage": 97.27, "elapsed_time": "1:36:36", "remaining_time": "0:02:42", "throughput": 1714.1, "total_tokens": 9935376}
|
| 7442 |
+
{"current_steps": 37115, "total_steps": 38150, "loss": 0.3898, "lr": 1.1223542182070002e-07, "epoch": 9.728702490170381, "percentage": 97.29, "elapsed_time": "1:36:36", "remaining_time": "0:02:41", "throughput": 1714.13, "total_tokens": 9936624}
|
| 7443 |
+
{"current_steps": 37120, "total_steps": 38150, "loss": 0.4055, "lr": 1.1115548418205168e-07, "epoch": 9.730013106159895, "percentage": 97.3, "elapsed_time": "1:36:37", "remaining_time": "0:02:40", "throughput": 1714.14, "total_tokens": 9937744}
|
| 7444 |
+
{"current_steps": 37125, "total_steps": 38150, "loss": 0.4577, "lr": 1.1008075573555599e-07, "epoch": 9.73132372214941, "percentage": 97.31, "elapsed_time": "1:36:38", "remaining_time": "0:02:40", "throughput": 1714.16, "total_tokens": 9939040}
|
| 7445 |
+
{"current_steps": 37130, "total_steps": 38150, "loss": 0.3461, "lr": 1.0901123670614965e-07, "epoch": 9.732634338138926, "percentage": 97.33, "elapsed_time": "1:36:38", "remaining_time": "0:02:39", "throughput": 1714.19, "total_tokens": 9940352}
|
| 7446 |
+
{"current_steps": 37135, "total_steps": 38150, "loss": 0.461, "lr": 1.0794692731768419e-07, "epoch": 9.73394495412844, "percentage": 97.34, "elapsed_time": "1:36:39", "remaining_time": "0:02:38", "throughput": 1714.21, "total_tokens": 9941552}
|
| 7447 |
+
{"current_steps": 37140, "total_steps": 38150, "loss": 0.4573, "lr": 1.0688782779291473e-07, "epoch": 9.735255570117955, "percentage": 97.35, "elapsed_time": "1:36:40", "remaining_time": "0:02:37", "throughput": 1714.26, "total_tokens": 9943088}
|
| 7448 |
+
{"current_steps": 37145, "total_steps": 38150, "loss": 0.4006, "lr": 1.0583393835351396e-07, "epoch": 9.73656618610747, "percentage": 97.37, "elapsed_time": "1:36:40", "remaining_time": "0:02:36", "throughput": 1714.27, "total_tokens": 9944192}
|
| 7449 |
+
{"current_steps": 37150, "total_steps": 38150, "loss": 0.3065, "lr": 1.0478525922005545e-07, "epoch": 9.737876802096986, "percentage": 97.38, "elapsed_time": "1:36:41", "remaining_time": "0:02:36", "throughput": 1714.35, "total_tokens": 9945936}
|
| 7450 |
+
{"current_steps": 37155, "total_steps": 38150, "loss": 0.4585, "lr": 1.0374179061202749e-07, "epoch": 9.7391874180865, "percentage": 97.39, "elapsed_time": "1:36:42", "remaining_time": "0:02:35", "throughput": 1714.41, "total_tokens": 9947504}
|
| 7451 |
+
{"current_steps": 37160, "total_steps": 38150, "loss": 0.2372, "lr": 1.0270353274782207e-07, "epoch": 9.740498034076015, "percentage": 97.4, "elapsed_time": "1:36:42", "remaining_time": "0:02:34", "throughput": 1714.43, "total_tokens": 9948656}
|
| 7452 |
+
{"current_steps": 37165, "total_steps": 38150, "loss": 0.3198, "lr": 1.0167048584475147e-07, "epoch": 9.741808650065531, "percentage": 97.42, "elapsed_time": "1:36:43", "remaining_time": "0:02:33", "throughput": 1714.55, "total_tokens": 9950832}
|
| 7453 |
+
{"current_steps": 37170, "total_steps": 38150, "loss": 0.419, "lr": 1.006426501190233e-07, "epoch": 9.743119266055047, "percentage": 97.43, "elapsed_time": "1:36:44", "remaining_time": "0:02:33", "throughput": 1714.65, "total_tokens": 9952656}
|
| 7454 |
+
{"current_steps": 37175, "total_steps": 38150, "loss": 0.3943, "lr": 9.962002578576823e-08, "epoch": 9.74442988204456, "percentage": 97.44, "elapsed_time": "1:36:45", "remaining_time": "0:02:32", "throughput": 1714.64, "total_tokens": 9953648}
|
| 7455 |
+
{"current_steps": 37180, "total_steps": 38150, "loss": 0.6126, "lr": 9.860261305901785e-08, "epoch": 9.745740498034076, "percentage": 97.46, "elapsed_time": "1:36:45", "remaining_time": "0:02:31", "throughput": 1714.64, "total_tokens": 9954720}
|
| 7456 |
+
{"current_steps": 37185, "total_steps": 38150, "loss": 0.6923, "lr": 9.759041215171295e-08, "epoch": 9.747051114023591, "percentage": 97.47, "elapsed_time": "1:36:46", "remaining_time": "0:02:30", "throughput": 1714.64, "total_tokens": 9955776}
|
| 7457 |
+
{"current_steps": 37190, "total_steps": 38150, "loss": 0.5265, "lr": 9.658342327570902e-08, "epoch": 9.748361730013107, "percentage": 97.48, "elapsed_time": "1:36:47", "remaining_time": "0:02:29", "throughput": 1714.66, "total_tokens": 9957072}
|
| 7458 |
+
{"current_steps": 37195, "total_steps": 38150, "loss": 0.3366, "lr": 9.55816466417625e-08, "epoch": 9.74967234600262, "percentage": 97.5, "elapsed_time": "1:36:47", "remaining_time": "0:02:29", "throughput": 1714.71, "total_tokens": 9958464}
|
| 7459 |
+
{"current_steps": 37200, "total_steps": 38150, "loss": 0.5896, "lr": 9.458508245954456e-08, "epoch": 9.750982961992136, "percentage": 97.51, "elapsed_time": "1:36:48", "remaining_time": "0:02:28", "throughput": 1714.75, "total_tokens": 9959824}
|
| 7460 |
+
{"current_steps": 37205, "total_steps": 38150, "loss": 0.5354, "lr": 9.359373093763835e-08, "epoch": 9.752293577981652, "percentage": 97.52, "elapsed_time": "1:36:48", "remaining_time": "0:02:27", "throughput": 1714.8, "total_tokens": 9961232}
|
| 7461 |
+
{"current_steps": 37210, "total_steps": 38150, "loss": 0.5544, "lr": 9.260759228352789e-08, "epoch": 9.753604193971167, "percentage": 97.54, "elapsed_time": "1:36:49", "remaining_time": "0:02:26", "throughput": 1714.89, "total_tokens": 9963040}
|
| 7462 |
+
{"current_steps": 37215, "total_steps": 38150, "loss": 0.4281, "lr": 9.162666670361198e-08, "epoch": 9.754914809960681, "percentage": 97.55, "elapsed_time": "1:36:50", "remaining_time": "0:02:25", "throughput": 1714.91, "total_tokens": 9964192}
|
| 7463 |
+
{"current_steps": 37220, "total_steps": 38150, "loss": 0.7711, "lr": 9.065095440319582e-08, "epoch": 9.756225425950197, "percentage": 97.56, "elapsed_time": "1:36:51", "remaining_time": "0:02:25", "throughput": 1714.96, "total_tokens": 9965648}
|
| 7464 |
+
{"current_steps": 37225, "total_steps": 38150, "loss": 0.373, "lr": 8.968045558649663e-08, "epoch": 9.757536041939712, "percentage": 97.58, "elapsed_time": "1:36:51", "remaining_time": "0:02:24", "throughput": 1715.02, "total_tokens": 9967216}
|
| 7465 |
+
{"current_steps": 37230, "total_steps": 38150, "loss": 0.4866, "lr": 8.871517045663524e-08, "epoch": 9.758846657929226, "percentage": 97.59, "elapsed_time": "1:36:52", "remaining_time": "0:02:23", "throughput": 1715.09, "total_tokens": 9968896}
|
| 7466 |
+
{"current_steps": 37235, "total_steps": 38150, "loss": 0.4039, "lr": 8.775509921564728e-08, "epoch": 9.760157273918741, "percentage": 97.6, "elapsed_time": "1:36:53", "remaining_time": "0:02:22", "throughput": 1715.17, "total_tokens": 9970576}
|
| 7467 |
+
{"current_steps": 37240, "total_steps": 38150, "loss": 0.6578, "lr": 8.680024206446924e-08, "epoch": 9.761467889908257, "percentage": 97.61, "elapsed_time": "1:36:53", "remaining_time": "0:02:22", "throughput": 1715.23, "total_tokens": 9972080}
|
| 7468 |
+
{"current_steps": 37245, "total_steps": 38150, "loss": 0.4219, "lr": 8.585059920295512e-08, "epoch": 9.762778505897773, "percentage": 97.63, "elapsed_time": "1:36:54", "remaining_time": "0:02:21", "throughput": 1715.28, "total_tokens": 9973536}
|
| 7469 |
+
{"current_steps": 37250, "total_steps": 38150, "loss": 0.3138, "lr": 8.490617082985986e-08, "epoch": 9.764089121887286, "percentage": 97.64, "elapsed_time": "1:36:55", "remaining_time": "0:02:20", "throughput": 1715.27, "total_tokens": 9974592}
|
| 7470 |
+
{"current_steps": 37255, "total_steps": 38150, "loss": 0.418, "lr": 8.396695714285585e-08, "epoch": 9.765399737876802, "percentage": 97.65, "elapsed_time": "1:36:55", "remaining_time": "0:02:19", "throughput": 1715.28, "total_tokens": 9975760}
|
| 7471 |
+
{"current_steps": 37260, "total_steps": 38150, "loss": 0.3359, "lr": 8.303295833851365e-08, "epoch": 9.766710353866317, "percentage": 97.67, "elapsed_time": "1:36:56", "remaining_time": "0:02:18", "throughput": 1715.28, "total_tokens": 9976832}
|
| 7472 |
+
{"current_steps": 37265, "total_steps": 38150, "loss": 0.8076, "lr": 8.210417461232412e-08, "epoch": 9.768020969855833, "percentage": 97.68, "elapsed_time": "1:36:57", "remaining_time": "0:02:18", "throughput": 1715.33, "total_tokens": 9978224}
|
| 7473 |
+
{"current_steps": 37270, "total_steps": 38150, "loss": 0.4236, "lr": 8.11806061586734e-08, "epoch": 9.769331585845347, "percentage": 97.69, "elapsed_time": "1:36:57", "remaining_time": "0:02:17", "throughput": 1715.37, "total_tokens": 9979600}
|
| 7474 |
+
{"current_steps": 37275, "total_steps": 38150, "loss": 0.7952, "lr": 8.026225317086522e-08, "epoch": 9.770642201834862, "percentage": 97.71, "elapsed_time": "1:36:58", "remaining_time": "0:02:16", "throughput": 1715.42, "total_tokens": 9981040}
|
| 7475 |
+
{"current_steps": 37280, "total_steps": 38150, "loss": 0.3827, "lr": 7.934911584110971e-08, "epoch": 9.771952817824378, "percentage": 97.72, "elapsed_time": "1:36:59", "remaining_time": "0:02:15", "throughput": 1715.44, "total_tokens": 9982288}
|
| 7476 |
+
{"current_steps": 37285, "total_steps": 38150, "loss": 0.4753, "lr": 7.844119436052622e-08, "epoch": 9.773263433813893, "percentage": 97.73, "elapsed_time": "1:36:59", "remaining_time": "0:02:15", "throughput": 1715.44, "total_tokens": 9983424}
|
| 7477 |
+
{"current_steps": 37290, "total_steps": 38150, "loss": 0.4159, "lr": 7.753848891913772e-08, "epoch": 9.774574049803407, "percentage": 97.75, "elapsed_time": "1:37:00", "remaining_time": "0:02:14", "throughput": 1715.45, "total_tokens": 9984528}
|
| 7478 |
+
{"current_steps": 37295, "total_steps": 38150, "loss": 0.3387, "lr": 7.664099970588201e-08, "epoch": 9.775884665792923, "percentage": 97.76, "elapsed_time": "1:37:00", "remaining_time": "0:02:13", "throughput": 1715.49, "total_tokens": 9985840}
|
| 7479 |
+
{"current_steps": 37300, "total_steps": 38150, "loss": 0.3063, "lr": 7.57487269085977e-08, "epoch": 9.777195281782438, "percentage": 97.77, "elapsed_time": "1:37:01", "remaining_time": "0:02:12", "throughput": 1715.54, "total_tokens": 9987248}
|
| 7480 |
+
{"current_steps": 37305, "total_steps": 38150, "loss": 0.3989, "lr": 7.486167071404371e-08, "epoch": 9.778505897771954, "percentage": 97.79, "elapsed_time": "1:37:02", "remaining_time": "0:02:11", "throughput": 1715.52, "total_tokens": 9988240}
|
| 7481 |
+
{"current_steps": 37310, "total_steps": 38150, "loss": 0.3123, "lr": 7.397983130787156e-08, "epoch": 9.779816513761467, "percentage": 97.8, "elapsed_time": "1:37:02", "remaining_time": "0:02:11", "throughput": 1715.56, "total_tokens": 9989536}
|
| 7482 |
+
{"current_steps": 37315, "total_steps": 38150, "loss": 0.4459, "lr": 7.310320887465305e-08, "epoch": 9.781127129750983, "percentage": 97.81, "elapsed_time": "1:37:03", "remaining_time": "0:02:10", "throughput": 1715.56, "total_tokens": 9990608}
|
| 7483 |
+
{"current_steps": 37320, "total_steps": 38150, "loss": 0.3141, "lr": 7.223180359786086e-08, "epoch": 9.782437745740499, "percentage": 97.82, "elapsed_time": "1:37:04", "remaining_time": "0:02:09", "throughput": 1715.61, "total_tokens": 9992048}
|
| 7484 |
+
{"current_steps": 37325, "total_steps": 38150, "loss": 0.5881, "lr": 7.136561565988242e-08, "epoch": 9.783748361730012, "percentage": 97.84, "elapsed_time": "1:37:04", "remaining_time": "0:02:08", "throughput": 1715.63, "total_tokens": 9993328}
|
| 7485 |
+
{"current_steps": 37330, "total_steps": 38150, "loss": 0.5805, "lr": 7.050464524200607e-08, "epoch": 9.785058977719528, "percentage": 97.85, "elapsed_time": "1:37:05", "remaining_time": "0:02:07", "throughput": 1715.66, "total_tokens": 9994560}
|
| 7486 |
+
{"current_steps": 37335, "total_steps": 38150, "loss": 0.5419, "lr": 6.964889252443485e-08, "epoch": 9.786369593709043, "percentage": 97.86, "elapsed_time": "1:37:06", "remaining_time": "0:02:07", "throughput": 1715.74, "total_tokens": 9996320}
|
| 7487 |
+
{"current_steps": 37340, "total_steps": 38150, "loss": 0.278, "lr": 6.879835768627274e-08, "epoch": 9.787680209698559, "percentage": 97.88, "elapsed_time": "1:37:06", "remaining_time": "0:02:06", "throughput": 1715.79, "total_tokens": 9997712}
|
| 7488 |
+
{"current_steps": 37345, "total_steps": 38150, "loss": 0.4306, "lr": 6.795304090553567e-08, "epoch": 9.788990825688073, "percentage": 97.89, "elapsed_time": "1:37:07", "remaining_time": "0:02:05", "throughput": 1715.82, "total_tokens": 9998960}
|
| 7489 |
+
{"current_steps": 37350, "total_steps": 38150, "loss": 0.4672, "lr": 6.711294235914877e-08, "epoch": 9.790301441677588, "percentage": 97.9, "elapsed_time": "1:37:08", "remaining_time": "0:02:04", "throughput": 1715.88, "total_tokens": 10000592}
|
| 7490 |
+
{"current_steps": 37355, "total_steps": 38150, "loss": 0.4097, "lr": 6.627806222294086e-08, "epoch": 9.791612057667104, "percentage": 97.92, "elapsed_time": "1:37:08", "remaining_time": "0:02:04", "throughput": 1715.94, "total_tokens": 10002080}
|
| 7491 |
+
{"current_steps": 37360, "total_steps": 38150, "loss": 0.4745, "lr": 6.544840067165548e-08, "epoch": 9.79292267365662, "percentage": 97.93, "elapsed_time": "1:37:09", "remaining_time": "0:02:03", "throughput": 1715.97, "total_tokens": 10003392}
|
| 7492 |
+
{"current_steps": 37365, "total_steps": 38150, "loss": 0.4986, "lr": 6.462395787893427e-08, "epoch": 9.794233289646133, "percentage": 97.94, "elapsed_time": "1:37:10", "remaining_time": "0:02:02", "throughput": 1716.02, "total_tokens": 10004928}
|
| 7493 |
+
{"current_steps": 37370, "total_steps": 38150, "loss": 0.4327, "lr": 6.380473401733366e-08, "epoch": 9.795543905635649, "percentage": 97.96, "elapsed_time": "1:37:10", "remaining_time": "0:02:01", "throughput": 1716.01, "total_tokens": 10005904}
|
| 7494 |
+
{"current_steps": 37375, "total_steps": 38150, "loss": 0.6154, "lr": 6.299072925831373e-08, "epoch": 9.796854521625164, "percentage": 97.97, "elapsed_time": "1:37:11", "remaining_time": "0:02:00", "throughput": 1716.0, "total_tokens": 10006944}
|
| 7495 |
+
{"current_steps": 37380, "total_steps": 38150, "loss": 0.5157, "lr": 6.218194377224928e-08, "epoch": 9.79816513761468, "percentage": 97.98, "elapsed_time": "1:37:12", "remaining_time": "0:02:00", "throughput": 1716.05, "total_tokens": 10008320}
|
| 7496 |
+
{"current_steps": 37385, "total_steps": 38150, "loss": 0.4487, "lr": 6.137837772841326e-08, "epoch": 9.799475753604193, "percentage": 97.99, "elapsed_time": "1:37:12", "remaining_time": "0:01:59", "throughput": 1716.07, "total_tokens": 10009504}
|
| 7497 |
+
{"current_steps": 37390, "total_steps": 38150, "loss": 0.3696, "lr": 6.058003129499334e-08, "epoch": 9.800786369593709, "percentage": 98.01, "elapsed_time": "1:37:13", "remaining_time": "0:01:58", "throughput": 1716.11, "total_tokens": 10010896}
|
| 7498 |
+
{"current_steps": 37395, "total_steps": 38150, "loss": 0.3567, "lr": 5.978690463908088e-08, "epoch": 9.802096985583225, "percentage": 98.02, "elapsed_time": "1:37:14", "remaining_time": "0:01:57", "throughput": 1716.11, "total_tokens": 10012048}
|
| 7499 |
+
{"current_steps": 37400, "total_steps": 38150, "loss": 0.5638, "lr": 5.8998997926676405e-08, "epoch": 9.80340760157274, "percentage": 98.03, "elapsed_time": "1:37:14", "remaining_time": "0:01:57", "throughput": 1716.13, "total_tokens": 10013200}
|
| 7500 |
+
{"current_steps": 37405, "total_steps": 38150, "loss": 0.6094, "lr": 5.821631132268412e-08, "epoch": 9.804718217562254, "percentage": 98.05, "elapsed_time": "1:37:15", "remaining_time": "0:01:56", "throughput": 1716.13, "total_tokens": 10014320}
|
| 7501 |
+
{"current_steps": 37410, "total_steps": 38150, "loss": 0.4961, "lr": 5.743884499092578e-08, "epoch": 9.80602883355177, "percentage": 98.06, "elapsed_time": "1:37:16", "remaining_time": "0:01:55", "throughput": 1716.22, "total_tokens": 10016336}
|
| 7502 |
+
{"current_steps": 37415, "total_steps": 38150, "loss": 0.2503, "lr": 5.6666599094115646e-08, "epoch": 9.807339449541285, "percentage": 98.07, "elapsed_time": "1:37:16", "remaining_time": "0:01:54", "throughput": 1716.27, "total_tokens": 10017728}
|
| 7503 |
+
{"current_steps": 37420, "total_steps": 38150, "loss": 0.5375, "lr": 5.58995737938911e-08, "epoch": 9.808650065530799, "percentage": 98.09, "elapsed_time": "1:37:17", "remaining_time": "0:01:53", "throughput": 1716.29, "total_tokens": 10018992}
|
| 7504 |
+
{"current_steps": 37425, "total_steps": 38150, "loss": 0.5454, "lr": 5.513776925078207e-08, "epoch": 9.809960681520314, "percentage": 98.1, "elapsed_time": "1:37:18", "remaining_time": "0:01:53", "throughput": 1716.33, "total_tokens": 10020400}
|
| 7505 |
+
{"current_steps": 37430, "total_steps": 38150, "loss": 0.4164, "lr": 5.4381185624238776e-08, "epoch": 9.81127129750983, "percentage": 98.11, "elapsed_time": "1:37:18", "remaining_time": "0:01:52", "throughput": 1716.32, "total_tokens": 10021392}
|
| 7506 |
+
{"current_steps": 37435, "total_steps": 38150, "loss": 0.4264, "lr": 5.362982307261233e-08, "epoch": 9.812581913499345, "percentage": 98.13, "elapsed_time": "1:37:19", "remaining_time": "0:01:51", "throughput": 1716.36, "total_tokens": 10022784}
|
| 7507 |
+
{"current_steps": 37440, "total_steps": 38150, "loss": 0.8296, "lr": 5.2883681753157497e-08, "epoch": 9.813892529488859, "percentage": 98.14, "elapsed_time": "1:37:20", "remaining_time": "0:01:50", "throughput": 1716.37, "total_tokens": 10023888}
|
| 7508 |
+
{"current_steps": 37445, "total_steps": 38150, "loss": 0.3616, "lr": 5.214276182204381e-08, "epoch": 9.815203145478375, "percentage": 98.15, "elapsed_time": "1:37:20", "remaining_time": "0:01:49", "throughput": 1716.42, "total_tokens": 10025376}
|
| 7509 |
+
{"current_steps": 37450, "total_steps": 38150, "loss": 0.5374, "lr": 5.140706343434165e-08, "epoch": 9.81651376146789, "percentage": 98.17, "elapsed_time": "1:37:21", "remaining_time": "0:01:49", "throughput": 1716.52, "total_tokens": 10027248}
|
| 7510 |
+
{"current_steps": 37455, "total_steps": 38150, "loss": 0.2843, "lr": 5.0676586744036194e-08, "epoch": 9.817824377457406, "percentage": 98.18, "elapsed_time": "1:37:22", "remaining_time": "0:01:48", "throughput": 1716.56, "total_tokens": 10028704}
|
| 7511 |
+
{"current_steps": 37460, "total_steps": 38150, "loss": 0.5301, "lr": 4.9951331904007915e-08, "epoch": 9.81913499344692, "percentage": 98.19, "elapsed_time": "1:37:22", "remaining_time": "0:01:47", "throughput": 1716.61, "total_tokens": 10030080}
|
| 7512 |
+
{"current_steps": 37465, "total_steps": 38150, "loss": 0.3898, "lr": 4.923129906606039e-08, "epoch": 9.820445609436435, "percentage": 98.2, "elapsed_time": "1:37:23", "remaining_time": "0:01:46", "throughput": 1716.7, "total_tokens": 10031936}
|
| 7513 |
+
{"current_steps": 37470, "total_steps": 38150, "loss": 0.4562, "lr": 4.851648838088696e-08, "epoch": 9.82175622542595, "percentage": 98.22, "elapsed_time": "1:37:24", "remaining_time": "0:01:46", "throughput": 1716.69, "total_tokens": 10032944}
|
| 7514 |
+
{"current_steps": 37475, "total_steps": 38150, "loss": 0.4719, "lr": 4.7806899998101283e-08, "epoch": 9.823066841415466, "percentage": 98.23, "elapsed_time": "1:37:24", "remaining_time": "0:01:45", "throughput": 1716.71, "total_tokens": 10034128}
|
| 7515 |
+
{"current_steps": 37480, "total_steps": 38150, "loss": 0.4303, "lr": 4.71025340662179e-08, "epoch": 9.82437745740498, "percentage": 98.24, "elapsed_time": "1:37:25", "remaining_time": "0:01:44", "throughput": 1716.71, "total_tokens": 10035216}
|
| 7516 |
+
{"current_steps": 37485, "total_steps": 38150, "loss": 0.4351, "lr": 4.6403390732654985e-08, "epoch": 9.825688073394495, "percentage": 98.26, "elapsed_time": "1:37:26", "remaining_time": "0:01:43", "throughput": 1716.77, "total_tokens": 10036736}
|
| 7517 |
+
{"current_steps": 37490, "total_steps": 38150, "loss": 0.3419, "lr": 4.570947014374827e-08, "epoch": 9.82699868938401, "percentage": 98.27, "elapsed_time": "1:37:27", "remaining_time": "0:01:42", "throughput": 1716.84, "total_tokens": 10038496}
|
| 7518 |
+
{"current_steps": 37495, "total_steps": 38150, "loss": 0.6545, "lr": 4.502077244473435e-08, "epoch": 9.828309305373526, "percentage": 98.28, "elapsed_time": "1:37:27", "remaining_time": "0:01:42", "throughput": 1716.86, "total_tokens": 10039744}
|
| 7519 |
+
{"current_steps": 37500, "total_steps": 38150, "loss": 0.4536, "lr": 4.4337297779750705e-08, "epoch": 9.82961992136304, "percentage": 98.3, "elapsed_time": "1:37:28", "remaining_time": "0:01:41", "throughput": 1716.98, "total_tokens": 10041888}
|
| 7520 |
+
{"current_steps": 37505, "total_steps": 38150, "loss": 0.2949, "lr": 4.365904629185236e-08, "epoch": 9.830930537352556, "percentage": 98.31, "elapsed_time": "1:37:29", "remaining_time": "0:01:40", "throughput": 1717.09, "total_tokens": 10043856}
|
| 7521 |
+
{"current_steps": 37510, "total_steps": 38150, "loss": 0.2457, "lr": 4.298601812299241e-08, "epoch": 9.832241153342071, "percentage": 98.32, "elapsed_time": "1:37:30", "remaining_time": "0:01:39", "throughput": 1717.11, "total_tokens": 10045184}
|
| 7522 |
+
{"current_steps": 37515, "total_steps": 38150, "loss": 0.5211, "lr": 4.2318213414038745e-08, "epoch": 9.833551769331585, "percentage": 98.34, "elapsed_time": "1:37:30", "remaining_time": "0:01:39", "throughput": 1717.13, "total_tokens": 10046416}
|
| 7523 |
+
{"current_steps": 37520, "total_steps": 38150, "loss": 0.4201, "lr": 4.1655632304757334e-08, "epoch": 9.8348623853211, "percentage": 98.35, "elapsed_time": "1:37:31", "remaining_time": "0:01:38", "throughput": 1717.18, "total_tokens": 10047808}
|
| 7524 |
+
{"current_steps": 37525, "total_steps": 38150, "loss": 0.348, "lr": 4.0998274933828905e-08, "epoch": 9.836173001310616, "percentage": 98.36, "elapsed_time": "1:37:32", "remaining_time": "0:01:37", "throughput": 1717.2, "total_tokens": 10049104}
|
| 7525 |
+
{"current_steps": 37530, "total_steps": 38150, "loss": 0.5667, "lr": 4.034614143883508e-08, "epoch": 9.837483617300132, "percentage": 98.37, "elapsed_time": "1:37:32", "remaining_time": "0:01:36", "throughput": 1717.2, "total_tokens": 10050144}
|
| 7526 |
+
{"current_steps": 37535, "total_steps": 38150, "loss": 0.6578, "lr": 3.969923195626668e-08, "epoch": 9.838794233289645, "percentage": 98.39, "elapsed_time": "1:37:33", "remaining_time": "0:01:35", "throughput": 1717.23, "total_tokens": 10051488}
|
| 7527 |
+
{"current_steps": 37540, "total_steps": 38150, "loss": 0.3663, "lr": 3.9057546621520946e-08, "epoch": 9.840104849279161, "percentage": 98.4, "elapsed_time": "1:37:33", "remaining_time": "0:01:35", "throughput": 1717.19, "total_tokens": 10052336}
|
| 7528 |
+
{"current_steps": 37545, "total_steps": 38150, "loss": 0.5609, "lr": 3.842108556890156e-08, "epoch": 9.841415465268676, "percentage": 98.41, "elapsed_time": "1:37:34", "remaining_time": "0:01:34", "throughput": 1717.21, "total_tokens": 10053600}
|
| 7529 |
+
{"current_steps": 37550, "total_steps": 38150, "loss": 0.394, "lr": 3.778984893161863e-08, "epoch": 9.842726081258192, "percentage": 98.43, "elapsed_time": "1:37:35", "remaining_time": "0:01:33", "throughput": 1717.24, "total_tokens": 10054832}
|
| 7530 |
+
{"current_steps": 37555, "total_steps": 38150, "loss": 0.4783, "lr": 3.7163836841791476e-08, "epoch": 9.844036697247706, "percentage": 98.44, "elapsed_time": "1:37:35", "remaining_time": "0:01:32", "throughput": 1717.27, "total_tokens": 10056080}
|
| 7531 |
+
{"current_steps": 37560, "total_steps": 38150, "loss": 0.4629, "lr": 3.654304943043752e-08, "epoch": 9.845347313237221, "percentage": 98.45, "elapsed_time": "1:37:36", "remaining_time": "0:01:31", "throughput": 1717.28, "total_tokens": 10057248}
|
| 7532 |
+
{"current_steps": 37565, "total_steps": 38150, "loss": 0.3988, "lr": 3.5927486827491696e-08, "epoch": 9.846657929226737, "percentage": 98.47, "elapsed_time": "1:37:37", "remaining_time": "0:01:31", "throughput": 1717.32, "total_tokens": 10058528}
|
| 7533 |
+
{"current_steps": 37570, "total_steps": 38150, "loss": 0.8589, "lr": 3.531714916178708e-08, "epoch": 9.847968545216252, "percentage": 98.48, "elapsed_time": "1:37:37", "remaining_time": "0:01:30", "throughput": 1717.34, "total_tokens": 10059792}
|
| 7534 |
+
{"current_steps": 37575, "total_steps": 38150, "loss": 0.5301, "lr": 3.4712036561068693e-08, "epoch": 9.849279161205766, "percentage": 98.49, "elapsed_time": "1:37:38", "remaining_time": "0:01:29", "throughput": 1717.4, "total_tokens": 10061472}
|
| 7535 |
+
{"current_steps": 37580, "total_steps": 38150, "loss": 0.3067, "lr": 3.4112149151982466e-08, "epoch": 9.850589777195282, "percentage": 98.51, "elapsed_time": "1:37:39", "remaining_time": "0:01:28", "throughput": 1717.45, "total_tokens": 10062976}
|
| 7536 |
+
{"current_steps": 37585, "total_steps": 38150, "loss": 0.4163, "lr": 3.351748706008628e-08, "epoch": 9.851900393184797, "percentage": 98.52, "elapsed_time": "1:37:39", "remaining_time": "0:01:28", "throughput": 1717.49, "total_tokens": 10064272}
|
| 7537 |
+
{"current_steps": 37590, "total_steps": 38150, "loss": 0.4554, "lr": 3.292805040984171e-08, "epoch": 9.853211009174313, "percentage": 98.53, "elapsed_time": "1:37:40", "remaining_time": "0:01:27", "throughput": 1717.52, "total_tokens": 10065696}
|
| 7538 |
+
{"current_steps": 37595, "total_steps": 38150, "loss": 0.4883, "lr": 3.234383932461671e-08, "epoch": 9.854521625163827, "percentage": 98.55, "elapsed_time": "1:37:41", "remaining_time": "0:01:26", "throughput": 1717.5, "total_tokens": 10066656}
|
| 7539 |
+
{"current_steps": 37600, "total_steps": 38150, "loss": 0.3162, "lr": 3.176485392668571e-08, "epoch": 9.855832241153342, "percentage": 98.56, "elapsed_time": "1:37:41", "remaining_time": "0:01:25", "throughput": 1717.49, "total_tokens": 10067632}
|
| 7540 |
+
{"current_steps": 37605, "total_steps": 38150, "loss": 0.3895, "lr": 3.119109433722955e-08, "epoch": 9.857142857142858, "percentage": 98.57, "elapsed_time": "1:37:42", "remaining_time": "0:01:24", "throughput": 1717.5, "total_tokens": 10068832}
|
| 7541 |
+
{"current_steps": 37610, "total_steps": 38150, "loss": 0.5132, "lr": 3.0622560676332734e-08, "epoch": 9.858453473132371, "percentage": 98.58, "elapsed_time": "1:37:43", "remaining_time": "0:01:24", "throughput": 1717.54, "total_tokens": 10070224}
|
| 7542 |
+
{"current_steps": 37615, "total_steps": 38150, "loss": 0.4392, "lr": 3.005925306299173e-08, "epoch": 9.859764089121887, "percentage": 98.6, "elapsed_time": "1:37:43", "remaining_time": "0:01:23", "throughput": 1717.62, "total_tokens": 10071952}
|
| 7543 |
+
{"current_steps": 37620, "total_steps": 38150, "loss": 0.5632, "lr": 2.9501171615103907e-08, "epoch": 9.861074705111402, "percentage": 98.61, "elapsed_time": "1:37:44", "remaining_time": "0:01:22", "throughput": 1717.65, "total_tokens": 10073200}
|
| 7544 |
+
{"current_steps": 37625, "total_steps": 38150, "loss": 0.4667, "lr": 2.8948316449473044e-08, "epoch": 9.862385321100918, "percentage": 98.62, "elapsed_time": "1:37:45", "remaining_time": "0:01:21", "throughput": 1717.71, "total_tokens": 10074672}
|
| 7545 |
+
{"current_steps": 37630, "total_steps": 38150, "loss": 0.4994, "lr": 2.840068768181492e-08, "epoch": 9.863695937090432, "percentage": 98.64, "elapsed_time": "1:37:45", "remaining_time": "0:01:21", "throughput": 1717.73, "total_tokens": 10075952}
|
| 7546 |
+
{"current_steps": 37635, "total_steps": 38150, "loss": 0.4288, "lr": 2.785828542674618e-08, "epoch": 9.865006553079947, "percentage": 98.65, "elapsed_time": "1:37:46", "remaining_time": "0:01:20", "throughput": 1717.73, "total_tokens": 10077008}
|
| 7547 |
+
{"current_steps": 37640, "total_steps": 38150, "loss": 0.4825, "lr": 2.7321109797787125e-08, "epoch": 9.866317169069463, "percentage": 98.66, "elapsed_time": "1:37:47", "remaining_time": "0:01:19", "throughput": 1717.75, "total_tokens": 10078224}
|
| 7548 |
+
{"current_steps": 37645, "total_steps": 38150, "loss": 0.5178, "lr": 2.6789160907372822e-08, "epoch": 9.867627785058978, "percentage": 98.68, "elapsed_time": "1:37:47", "remaining_time": "0:01:18", "throughput": 1717.76, "total_tokens": 10079392}
|
| 7549 |
+
{"current_steps": 37650, "total_steps": 38150, "loss": 0.605, "lr": 2.626243886683366e-08, "epoch": 9.868938401048492, "percentage": 98.69, "elapsed_time": "1:37:48", "remaining_time": "0:01:17", "throughput": 1717.81, "total_tokens": 10080752}
|
| 7550 |
+
{"current_steps": 37655, "total_steps": 38150, "loss": 0.4248, "lr": 2.574094378641756e-08, "epoch": 9.870249017038008, "percentage": 98.7, "elapsed_time": "1:37:49", "remaining_time": "0:01:17", "throughput": 1717.83, "total_tokens": 10081968}
|
| 7551 |
+
{"current_steps": 37660, "total_steps": 38150, "loss": 0.4038, "lr": 2.5224675775270544e-08, "epoch": 9.871559633027523, "percentage": 98.72, "elapsed_time": "1:37:49", "remaining_time": "0:01:16", "throughput": 1717.89, "total_tokens": 10083488}
|
| 7552 |
+
{"current_steps": 37665, "total_steps": 38150, "loss": 0.2978, "lr": 2.4713634941442298e-08, "epoch": 9.872870249017039, "percentage": 98.73, "elapsed_time": "1:37:50", "remaining_time": "0:01:15", "throughput": 1717.96, "total_tokens": 10085296}
|
| 7553 |
+
{"current_steps": 37670, "total_steps": 38150, "loss": 0.4928, "lr": 2.4207821391900032e-08, "epoch": 9.874180865006553, "percentage": 98.74, "elapsed_time": "1:37:51", "remaining_time": "0:01:14", "throughput": 1717.99, "total_tokens": 10086544}
|
| 7554 |
+
{"current_steps": 37675, "total_steps": 38150, "loss": 0.4888, "lr": 2.370723523250351e-08, "epoch": 9.875491480996068, "percentage": 98.75, "elapsed_time": "1:37:51", "remaining_time": "0:01:14", "throughput": 1718.01, "total_tokens": 10087824}
|
| 7555 |
+
{"current_steps": 37680, "total_steps": 38150, "loss": 0.5324, "lr": 2.321187656802726e-08, "epoch": 9.876802096985584, "percentage": 98.77, "elapsed_time": "1:37:52", "remaining_time": "0:01:13", "throughput": 1718.05, "total_tokens": 10089248}
|
| 7556 |
+
{"current_steps": 37685, "total_steps": 38150, "loss": 0.7052, "lr": 2.272174550214945e-08, "epoch": 9.8781127129751, "percentage": 98.78, "elapsed_time": "1:37:53", "remaining_time": "0:01:12", "throughput": 1718.07, "total_tokens": 10090400}
|
| 7557 |
+
{"current_steps": 37690, "total_steps": 38150, "loss": 0.5775, "lr": 2.223684213745192e-08, "epoch": 9.879423328964613, "percentage": 98.79, "elapsed_time": "1:37:53", "remaining_time": "0:01:11", "throughput": 1718.06, "total_tokens": 10091440}
|
| 7558 |
+
{"current_steps": 37695, "total_steps": 38150, "loss": 0.4851, "lr": 2.1757166575425702e-08, "epoch": 9.880733944954128, "percentage": 98.81, "elapsed_time": "1:37:54", "remaining_time": "0:01:10", "throughput": 1718.1, "total_tokens": 10092784}
|
| 7559 |
+
{"current_steps": 37700, "total_steps": 38150, "loss": 0.4529, "lr": 2.1282718916465494e-08, "epoch": 9.882044560943644, "percentage": 98.82, "elapsed_time": "1:37:55", "remaining_time": "0:01:10", "throughput": 1718.17, "total_tokens": 10094352}
|
| 7560 |
+
{"current_steps": 37705, "total_steps": 38150, "loss": 0.4666, "lr": 2.0813499259872414e-08, "epoch": 9.883355176933158, "percentage": 98.83, "elapsed_time": "1:37:55", "remaining_time": "0:01:09", "throughput": 1718.18, "total_tokens": 10095488}
|
| 7561 |
+
{"current_steps": 37710, "total_steps": 38150, "loss": 0.5032, "lr": 2.0349507703851244e-08, "epoch": 9.884665792922673, "percentage": 98.85, "elapsed_time": "1:37:56", "remaining_time": "0:01:08", "throughput": 1718.17, "total_tokens": 10096496}
|
| 7562 |
+
{"current_steps": 37715, "total_steps": 38150, "loss": 0.4291, "lr": 1.9890744345518742e-08, "epoch": 9.885976408912189, "percentage": 98.86, "elapsed_time": "1:37:57", "remaining_time": "0:01:07", "throughput": 1718.24, "total_tokens": 10098080}
|
| 7563 |
+
{"current_steps": 37720, "total_steps": 38150, "loss": 0.3265, "lr": 1.9437209280889768e-08, "epoch": 9.887287024901704, "percentage": 98.87, "elapsed_time": "1:37:57", "remaining_time": "0:01:07", "throughput": 1718.31, "total_tokens": 10099808}
|
| 7564 |
+
{"current_steps": 37725, "total_steps": 38150, "loss": 0.5593, "lr": 1.8988902604891166e-08, "epoch": 9.888597640891218, "percentage": 98.89, "elapsed_time": "1:37:58", "remaining_time": "0:01:06", "throughput": 1718.28, "total_tokens": 10100736}
|
| 7565 |
+
{"current_steps": 37730, "total_steps": 38150, "loss": 0.4665, "lr": 1.8545824411350665e-08, "epoch": 9.889908256880734, "percentage": 98.9, "elapsed_time": "1:37:59", "remaining_time": "0:01:05", "throughput": 1718.29, "total_tokens": 10101856}
|
| 7566 |
+
{"current_steps": 37735, "total_steps": 38150, "loss": 0.5775, "lr": 1.810797479300519e-08, "epoch": 9.89121887287025, "percentage": 98.91, "elapsed_time": "1:37:59", "remaining_time": "0:01:04", "throughput": 1718.3, "total_tokens": 10102944}
|
| 7567 |
+
{"current_steps": 37740, "total_steps": 38150, "loss": 0.4107, "lr": 1.7675353841495325e-08, "epoch": 9.892529488859765, "percentage": 98.93, "elapsed_time": "1:38:00", "remaining_time": "0:01:03", "throughput": 1718.41, "total_tokens": 10105040}
|
| 7568 |
+
{"current_steps": 37745, "total_steps": 38150, "loss": 0.5416, "lr": 1.7247961647368082e-08, "epoch": 9.893840104849279, "percentage": 98.94, "elapsed_time": "1:38:01", "remaining_time": "0:01:03", "throughput": 1718.46, "total_tokens": 10106576}
|
| 7569 |
+
{"current_steps": 37750, "total_steps": 38150, "loss": 0.4093, "lr": 1.6825798300074137e-08, "epoch": 9.895150720838794, "percentage": 98.95, "elapsed_time": "1:38:01", "remaining_time": "0:01:02", "throughput": 1718.5, "total_tokens": 10108000}
|
| 7570 |
+
{"current_steps": 37755, "total_steps": 38150, "loss": 0.3691, "lr": 1.640886388797336e-08, "epoch": 9.89646133682831, "percentage": 98.96, "elapsed_time": "1:38:02", "remaining_time": "0:01:01", "throughput": 1718.59, "total_tokens": 10109776}
|
| 7571 |
+
{"current_steps": 37760, "total_steps": 38150, "loss": 0.5016, "lr": 1.5997158498329277e-08, "epoch": 9.897771952817825, "percentage": 98.98, "elapsed_time": "1:38:03", "remaining_time": "0:01:00", "throughput": 1718.64, "total_tokens": 10111344}
|
| 7572 |
+
{"current_steps": 37765, "total_steps": 38150, "loss": 0.4656, "lr": 1.559068221731186e-08, "epoch": 9.899082568807339, "percentage": 98.99, "elapsed_time": "1:38:04", "remaining_time": "0:00:59", "throughput": 1718.7, "total_tokens": 10112816}
|
| 7573 |
+
{"current_steps": 37770, "total_steps": 38150, "loss": 0.4262, "lr": 1.518943512999471e-08, "epoch": 9.900393184796854, "percentage": 99.0, "elapsed_time": "1:38:04", "remaining_time": "0:00:59", "throughput": 1718.73, "total_tokens": 10114112}
|
| 7574 |
+
{"current_steps": 37775, "total_steps": 38150, "loss": 0.5565, "lr": 1.4793417320357882e-08, "epoch": 9.90170380078637, "percentage": 99.02, "elapsed_time": "1:38:05", "remaining_time": "0:00:58", "throughput": 1718.78, "total_tokens": 10115584}
|
| 7575 |
+
{"current_steps": 37780, "total_steps": 38150, "loss": 0.4864, "lr": 1.4402628871285073e-08, "epoch": 9.903014416775886, "percentage": 99.03, "elapsed_time": "1:38:06", "remaining_time": "0:00:57", "throughput": 1718.89, "total_tokens": 10117760}
|
| 7576 |
+
{"current_steps": 37785, "total_steps": 38150, "loss": 0.5071, "lr": 1.401706986457474e-08, "epoch": 9.9043250327654, "percentage": 99.04, "elapsed_time": "1:38:06", "remaining_time": "0:00:56", "throughput": 1718.88, "total_tokens": 10118720}
|
| 7577 |
+
{"current_steps": 37790, "total_steps": 38150, "loss": 0.6881, "lr": 1.3636740380915113e-08, "epoch": 9.905635648754915, "percentage": 99.06, "elapsed_time": "1:38:07", "remaining_time": "0:00:56", "throughput": 1718.92, "total_tokens": 10120128}
|
| 7578 |
+
{"current_steps": 37795, "total_steps": 38150, "loss": 0.4767, "lr": 1.326164049991474e-08, "epoch": 9.90694626474443, "percentage": 99.07, "elapsed_time": "1:38:08", "remaining_time": "0:00:55", "throughput": 1719.02, "total_tokens": 10122112}
|
| 7579 |
+
{"current_steps": 37800, "total_steps": 38150, "loss": 0.5301, "lr": 1.2891770300080263e-08, "epoch": 9.908256880733944, "percentage": 99.08, "elapsed_time": "1:38:08", "remaining_time": "0:00:54", "throughput": 1719.06, "total_tokens": 10123536}
|
| 7580 |
+
{"current_steps": 37805, "total_steps": 38150, "loss": 0.4071, "lr": 1.2527129858821985e-08, "epoch": 9.90956749672346, "percentage": 99.1, "elapsed_time": "1:38:09", "remaining_time": "0:00:53", "throughput": 1719.12, "total_tokens": 10125088}
|
| 7581 |
+
{"current_steps": 37810, "total_steps": 38150, "loss": 0.5652, "lr": 1.2167719252462183e-08, "epoch": 9.910878112712975, "percentage": 99.11, "elapsed_time": "1:38:10", "remaining_time": "0:00:52", "throughput": 1719.17, "total_tokens": 10126512}
|
| 7582 |
+
{"current_steps": 37815, "total_steps": 38150, "loss": 0.6393, "lr": 1.1813538556221249e-08, "epoch": 9.91218872870249, "percentage": 99.12, "elapsed_time": "1:38:10", "remaining_time": "0:00:52", "throughput": 1719.16, "total_tokens": 10127488}
|
| 7583 |
+
{"current_steps": 37820, "total_steps": 38150, "loss": 0.4564, "lr": 1.1464587844231544e-08, "epoch": 9.913499344692005, "percentage": 99.13, "elapsed_time": "1:38:11", "remaining_time": "0:00:51", "throughput": 1719.18, "total_tokens": 10128704}
|
| 7584 |
+
{"current_steps": 37825, "total_steps": 38150, "loss": 0.5635, "lr": 1.1120867189526318e-08, "epoch": 9.91480996068152, "percentage": 99.15, "elapsed_time": "1:38:12", "remaining_time": "0:00:50", "throughput": 1719.19, "total_tokens": 10129904}
|
| 7585 |
+
{"current_steps": 37830, "total_steps": 38150, "loss": 0.4425, "lr": 1.0782376664045246e-08, "epoch": 9.916120576671036, "percentage": 99.16, "elapsed_time": "1:38:12", "remaining_time": "0:00:49", "throughput": 1719.23, "total_tokens": 10131232}
|
| 7586 |
+
{"current_steps": 37835, "total_steps": 38150, "loss": 0.3406, "lr": 1.044911633863721e-08, "epoch": 9.917431192660551, "percentage": 99.17, "elapsed_time": "1:38:13", "remaining_time": "0:00:49", "throughput": 1719.27, "total_tokens": 10132640}
|
| 7587 |
+
{"current_steps": 37840, "total_steps": 38150, "loss": 0.5424, "lr": 1.0121086283049198e-08, "epoch": 9.918741808650065, "percentage": 99.19, "elapsed_time": "1:38:14", "remaining_time": "0:00:48", "throughput": 1719.26, "total_tokens": 10133728}
|
| 7588 |
+
{"current_steps": 37845, "total_steps": 38150, "loss": 0.4494, "lr": 9.798286565937399e-09, "epoch": 9.92005242463958, "percentage": 99.2, "elapsed_time": "1:38:14", "remaining_time": "0:00:47", "throughput": 1719.29, "total_tokens": 10134992}
|
| 7589 |
+
{"current_steps": 37850, "total_steps": 38150, "loss": 0.3853, "lr": 9.48071725486721e-09, "epoch": 9.921363040629096, "percentage": 99.21, "elapsed_time": "1:38:15", "remaining_time": "0:00:46", "throughput": 1719.28, "total_tokens": 10136000}
|
| 7590 |
+
{"current_steps": 37855, "total_steps": 38150, "loss": 0.5436, "lr": 9.168378416299362e-09, "epoch": 9.922673656618612, "percentage": 99.23, "elapsed_time": "1:38:16", "remaining_time": "0:00:45", "throughput": 1719.27, "total_tokens": 10137008}
|
| 7591 |
+
{"current_steps": 37860, "total_steps": 38150, "loss": 0.3707, "lr": 8.861270115612108e-09, "epoch": 9.923984272608125, "percentage": 99.24, "elapsed_time": "1:38:16", "remaining_time": "0:00:45", "throughput": 1719.35, "total_tokens": 10138576}
|
| 7592 |
+
{"current_steps": 37865, "total_steps": 38150, "loss": 0.3039, "lr": 8.559392417079038e-09, "epoch": 9.92529488859764, "percentage": 99.25, "elapsed_time": "1:38:17", "remaining_time": "0:00:44", "throughput": 1719.36, "total_tokens": 10139728}
|
| 7593 |
+
{"current_steps": 37870, "total_steps": 38150, "loss": 0.62, "lr": 8.262745383880166e-09, "epoch": 9.926605504587156, "percentage": 99.27, "elapsed_time": "1:38:18", "remaining_time": "0:00:43", "throughput": 1719.42, "total_tokens": 10141328}
|
| 7594 |
+
{"current_steps": 37875, "total_steps": 38150, "loss": 0.4175, "lr": 7.971329078110268e-09, "epoch": 9.927916120576672, "percentage": 99.28, "elapsed_time": "1:38:18", "remaining_time": "0:00:42", "throughput": 1719.43, "total_tokens": 10142432}
|
| 7595 |
+
{"current_steps": 37880, "total_steps": 38150, "loss": 0.5269, "lr": 7.68514356075667e-09, "epoch": 9.929226736566186, "percentage": 99.29, "elapsed_time": "1:38:19", "remaining_time": "0:00:42", "throughput": 1719.48, "total_tokens": 10143936}
|
| 7596 |
+
{"current_steps": 37885, "total_steps": 38150, "loss": 0.3476, "lr": 7.4041888917186775e-09, "epoch": 9.930537352555701, "percentage": 99.31, "elapsed_time": "1:38:20", "remaining_time": "0:00:41", "throughput": 1719.51, "total_tokens": 10145152}
|
| 7597 |
+
{"current_steps": 37890, "total_steps": 38150, "loss": 0.522, "lr": 7.1284651297992556e-09, "epoch": 9.931847968545217, "percentage": 99.32, "elapsed_time": "1:38:20", "remaining_time": "0:00:40", "throughput": 1719.55, "total_tokens": 10146720}
|
| 7598 |
+
{"current_steps": 37895, "total_steps": 38150, "loss": 0.4965, "lr": 6.8579723327105715e-09, "epoch": 9.93315858453473, "percentage": 99.33, "elapsed_time": "1:38:21", "remaining_time": "0:00:39", "throughput": 1719.55, "total_tokens": 10147776}
|
| 7599 |
+
{"current_steps": 37900, "total_steps": 38150, "loss": 0.5095, "lr": 6.5927105570601224e-09, "epoch": 9.934469200524246, "percentage": 99.34, "elapsed_time": "1:38:22", "remaining_time": "0:00:38", "throughput": 1719.55, "total_tokens": 10148848}
|
| 7600 |
+
{"current_steps": 37905, "total_steps": 38150, "loss": 0.5331, "lr": 6.3326798583729364e-09, "epoch": 9.935779816513762, "percentage": 99.36, "elapsed_time": "1:38:22", "remaining_time": "0:00:38", "throughput": 1719.54, "total_tokens": 10149872}
|
| 7601 |
+
{"current_steps": 37910, "total_steps": 38150, "loss": 0.5014, "lr": 6.077880291069371e-09, "epoch": 9.937090432503277, "percentage": 99.37, "elapsed_time": "1:38:23", "remaining_time": "0:00:37", "throughput": 1719.57, "total_tokens": 10151088}
|
| 7602 |
+
{"current_steps": 37915, "total_steps": 38150, "loss": 0.5018, "lr": 5.8283119084789895e-09, "epoch": 9.938401048492791, "percentage": 99.38, "elapsed_time": "1:38:23", "remaining_time": "0:00:36", "throughput": 1719.6, "total_tokens": 10152384}
|
| 7603 |
+
{"current_steps": 37920, "total_steps": 38150, "loss": 0.5274, "lr": 5.583974762835009e-09, "epoch": 9.939711664482306, "percentage": 99.4, "elapsed_time": "1:38:24", "remaining_time": "0:00:35", "throughput": 1719.63, "total_tokens": 10153728}
|
| 7604 |
+
{"current_steps": 37925, "total_steps": 38150, "loss": 0.4422, "lr": 5.344868905279854e-09, "epoch": 9.941022280471822, "percentage": 99.41, "elapsed_time": "1:38:25", "remaining_time": "0:00:35", "throughput": 1719.67, "total_tokens": 10155152}
|
| 7605 |
+
{"current_steps": 37930, "total_steps": 38150, "loss": 0.5795, "lr": 5.110994385856826e-09, "epoch": 9.942332896461338, "percentage": 99.42, "elapsed_time": "1:38:25", "remaining_time": "0:00:34", "throughput": 1719.71, "total_tokens": 10156560}
|
| 7606 |
+
{"current_steps": 37935, "total_steps": 38150, "loss": 0.3342, "lr": 4.8823512535128845e-09, "epoch": 9.943643512450851, "percentage": 99.44, "elapsed_time": "1:38:26", "remaining_time": "0:00:33", "throughput": 1719.78, "total_tokens": 10158288}
|
| 7607 |
+
{"current_steps": 37940, "total_steps": 38150, "loss": 0.3313, "lr": 4.658939556104191e-09, "epoch": 9.944954128440367, "percentage": 99.45, "elapsed_time": "1:38:27", "remaining_time": "0:00:32", "throughput": 1719.83, "total_tokens": 10159664}
|
| 7608 |
+
{"current_steps": 37945, "total_steps": 38150, "loss": 0.5568, "lr": 4.440759340393341e-09, "epoch": 9.946264744429882, "percentage": 99.46, "elapsed_time": "1:38:27", "remaining_time": "0:00:31", "throughput": 1719.83, "total_tokens": 10160720}
|
| 7609 |
+
{"current_steps": 37950, "total_steps": 38150, "loss": 0.5958, "lr": 4.227810652041031e-09, "epoch": 9.947575360419398, "percentage": 99.48, "elapsed_time": "1:38:28", "remaining_time": "0:00:31", "throughput": 1719.86, "total_tokens": 10161984}
|
| 7610 |
+
{"current_steps": 37955, "total_steps": 38150, "loss": 0.4652, "lr": 4.0200935356171645e-09, "epoch": 9.948885976408912, "percentage": 99.49, "elapsed_time": "1:38:29", "remaining_time": "0:00:30", "throughput": 1719.87, "total_tokens": 10163232}
|
| 7611 |
+
{"current_steps": 37960, "total_steps": 38150, "loss": 0.5117, "lr": 3.817608034600851e-09, "epoch": 9.950196592398427, "percentage": 99.5, "elapsed_time": "1:38:29", "remaining_time": "0:00:29", "throughput": 1719.87, "total_tokens": 10164272}
|
| 7612 |
+
{"current_steps": 37965, "total_steps": 38150, "loss": 0.4646, "lr": 3.620354191366526e-09, "epoch": 9.951507208387943, "percentage": 99.52, "elapsed_time": "1:38:30", "remaining_time": "0:00:28", "throughput": 1719.87, "total_tokens": 10165328}
|
| 7613 |
+
{"current_steps": 37970, "total_steps": 38150, "loss": 0.5442, "lr": 3.4283320472033864e-09, "epoch": 9.952817824377458, "percentage": 99.53, "elapsed_time": "1:38:31", "remaining_time": "0:00:28", "throughput": 1719.86, "total_tokens": 10166352}
|
| 7614 |
+
{"current_steps": 37975, "total_steps": 38150, "loss": 0.6032, "lr": 3.241541642298729e-09, "epoch": 9.954128440366972, "percentage": 99.54, "elapsed_time": "1:38:31", "remaining_time": "0:00:27", "throughput": 1719.92, "total_tokens": 10168000}
|
| 7615 |
+
{"current_steps": 37980, "total_steps": 38150, "loss": 0.3812, "lr": 3.059983015749057e-09, "epoch": 9.955439056356488, "percentage": 99.55, "elapsed_time": "1:38:32", "remaining_time": "0:00:26", "throughput": 1719.93, "total_tokens": 10169088}
|
| 7616 |
+
{"current_steps": 37985, "total_steps": 38150, "loss": 0.4265, "lr": 2.88365620555453e-09, "epoch": 9.956749672346003, "percentage": 99.57, "elapsed_time": "1:38:33", "remaining_time": "0:00:25", "throughput": 1720.06, "total_tokens": 10171456}
|
| 7617 |
+
{"current_steps": 37990, "total_steps": 38150, "loss": 0.5511, "lr": 2.712561248618961e-09, "epoch": 9.958060288335517, "percentage": 99.58, "elapsed_time": "1:38:34", "remaining_time": "0:00:24", "throughput": 1720.08, "total_tokens": 10172672}
|
| 7618 |
+
{"current_steps": 37995, "total_steps": 38150, "loss": 0.6493, "lr": 2.546698180749818e-09, "epoch": 9.959370904325032, "percentage": 99.59, "elapsed_time": "1:38:34", "remaining_time": "0:00:24", "throughput": 1720.11, "total_tokens": 10173888}
|
| 7619 |
+
{"current_steps": 38000, "total_steps": 38150, "loss": 0.4543, "lr": 2.3860670366665505e-09, "epoch": 9.960681520314548, "percentage": 99.61, "elapsed_time": "1:38:35", "remaining_time": "0:00:23", "throughput": 1720.17, "total_tokens": 10175392}
|
| 7620 |
+
{"current_steps": 38005, "total_steps": 38150, "loss": 0.4227, "lr": 2.2306678499867116e-09, "epoch": 9.961992136304064, "percentage": 99.62, "elapsed_time": "1:38:35", "remaining_time": "0:00:22", "throughput": 1720.18, "total_tokens": 10176576}
|
| 7621 |
+
{"current_steps": 38010, "total_steps": 38150, "loss": 0.366, "lr": 2.080500653234285e-09, "epoch": 9.963302752293577, "percentage": 99.63, "elapsed_time": "1:38:36", "remaining_time": "0:00:21", "throughput": 1720.19, "total_tokens": 10177664}
|
| 7622 |
+
{"current_steps": 38015, "total_steps": 38150, "loss": 0.3758, "lr": 1.935565477839685e-09, "epoch": 9.964613368283093, "percentage": 99.65, "elapsed_time": "1:38:37", "remaining_time": "0:00:21", "throughput": 1720.24, "total_tokens": 10179152}
|
| 7623 |
+
{"current_steps": 38020, "total_steps": 38150, "loss": 0.5286, "lr": 1.7958623541397546e-09, "epoch": 9.965923984272608, "percentage": 99.66, "elapsed_time": "1:38:37", "remaining_time": "0:00:20", "throughput": 1720.22, "total_tokens": 10180096}
|
| 7624 |
+
{"current_steps": 38025, "total_steps": 38150, "loss": 0.5744, "lr": 1.6613913113694424e-09, "epoch": 9.967234600262124, "percentage": 99.67, "elapsed_time": "1:38:38", "remaining_time": "0:00:19", "throughput": 1720.25, "total_tokens": 10181456}
|
| 7625 |
+
{"current_steps": 38030, "total_steps": 38150, "loss": 0.6039, "lr": 1.532152377678453e-09, "epoch": 9.968545216251638, "percentage": 99.69, "elapsed_time": "1:38:39", "remaining_time": "0:00:18", "throughput": 1720.27, "total_tokens": 10182752}
|
| 7626 |
+
{"current_steps": 38035, "total_steps": 38150, "loss": 0.4691, "lr": 1.4081455801145948e-09, "epoch": 9.969855832241153, "percentage": 99.7, "elapsed_time": "1:38:39", "remaining_time": "0:00:17", "throughput": 1720.31, "total_tokens": 10184032}
|
| 7627 |
+
{"current_steps": 38040, "total_steps": 38150, "loss": 0.4383, "lr": 1.289370944629331e-09, "epoch": 9.971166448230669, "percentage": 99.71, "elapsed_time": "1:38:40", "remaining_time": "0:00:17", "throughput": 1720.42, "total_tokens": 10186016}
|
| 7628 |
+
{"current_steps": 38045, "total_steps": 38150, "loss": 0.5702, "lr": 1.1758284960861066e-09, "epoch": 9.972477064220184, "percentage": 99.72, "elapsed_time": "1:38:41", "remaining_time": "0:00:16", "throughput": 1720.49, "total_tokens": 10187632}
|
| 7629 |
+
{"current_steps": 38050, "total_steps": 38150, "loss": 0.4261, "lr": 1.0675182582464693e-09, "epoch": 9.973787680209698, "percentage": 99.74, "elapsed_time": "1:38:41", "remaining_time": "0:00:15", "throughput": 1720.49, "total_tokens": 10188736}
|
| 7630 |
+
{"current_steps": 38055, "total_steps": 38150, "loss": 0.4982, "lr": 9.644402537811736e-10, "epoch": 9.975098296199214, "percentage": 99.75, "elapsed_time": "1:38:42", "remaining_time": "0:00:14", "throughput": 1720.55, "total_tokens": 10190368}
|
| 7631 |
+
{"current_steps": 38060, "total_steps": 38150, "loss": 0.4371, "lr": 8.665945042618529e-10, "epoch": 9.97640891218873, "percentage": 99.76, "elapsed_time": "1:38:43", "remaining_time": "0:00:14", "throughput": 1720.56, "total_tokens": 10191504}
|
| 7632 |
+
{"current_steps": 38065, "total_steps": 38150, "loss": 0.471, "lr": 7.739810301693462e-10, "epoch": 9.977719528178245, "percentage": 99.78, "elapsed_time": "1:38:44", "remaining_time": "0:00:13", "throughput": 1720.58, "total_tokens": 10192816}
|
| 7633 |
+
{"current_steps": 38070, "total_steps": 38150, "loss": 0.5922, "lr": 6.865998508881477e-10, "epoch": 9.979030144167758, "percentage": 99.79, "elapsed_time": "1:38:44", "remaining_time": "0:00:12", "throughput": 1720.57, "total_tokens": 10193808}
|
| 7634 |
+
{"current_steps": 38075, "total_steps": 38150, "loss": 0.666, "lr": 6.044509847064062e-10, "epoch": 9.980340760157274, "percentage": 99.8, "elapsed_time": "1:38:45", "remaining_time": "0:00:11", "throughput": 1720.55, "total_tokens": 10194784}
|
| 7635 |
+
{"current_steps": 38080, "total_steps": 38150, "loss": 0.5533, "lr": 5.275344488187006e-10, "epoch": 9.98165137614679, "percentage": 99.82, "elapsed_time": "1:38:45", "remaining_time": "0:00:10", "throughput": 1720.57, "total_tokens": 10195936}
|
| 7636 |
+
{"current_steps": 38085, "total_steps": 38150, "loss": 0.4056, "lr": 4.5585025932048943e-10, "epoch": 9.982961992136303, "percentage": 99.83, "elapsed_time": "1:38:46", "remaining_time": "0:00:10", "throughput": 1720.61, "total_tokens": 10197312}
|
| 7637 |
+
{"current_steps": 38090, "total_steps": 38150, "loss": 0.5213, "lr": 3.893984312164367e-10, "epoch": 9.984272608125819, "percentage": 99.84, "elapsed_time": "1:38:47", "remaining_time": "0:00:09", "throughput": 1720.64, "total_tokens": 10198608}
|
| 7638 |
+
{"current_steps": 38095, "total_steps": 38150, "loss": 0.6512, "lr": 3.28178978417637e-10, "epoch": 9.985583224115334, "percentage": 99.86, "elapsed_time": "1:38:47", "remaining_time": "0:00:08", "throughput": 1720.69, "total_tokens": 10200032}
|
| 7639 |
+
{"current_steps": 38100, "total_steps": 38150, "loss": 0.4037, "lr": 2.7219191373328846e-10, "epoch": 9.98689384010485, "percentage": 99.87, "elapsed_time": "1:38:48", "remaining_time": "0:00:07", "throughput": 1720.76, "total_tokens": 10201680}
|
| 7640 |
+
{"current_steps": 38105, "total_steps": 38150, "loss": 0.4281, "lr": 2.2143724888179506e-10, "epoch": 9.988204456094364, "percentage": 99.88, "elapsed_time": "1:38:49", "remaining_time": "0:00:07", "throughput": 1720.8, "total_tokens": 10203056}
|
| 7641 |
+
{"current_steps": 38110, "total_steps": 38150, "loss": 0.4263, "lr": 1.759149944879912e-10, "epoch": 9.98951507208388, "percentage": 99.9, "elapsed_time": "1:38:49", "remaining_time": "0:00:06", "throughput": 1720.79, "total_tokens": 10204032}
|
| 7642 |
+
{"current_steps": 38115, "total_steps": 38150, "loss": 0.3833, "lr": 1.3562516008036597e-10, "epoch": 9.990825688073395, "percentage": 99.91, "elapsed_time": "1:38:50", "remaining_time": "0:00:05", "throughput": 1720.84, "total_tokens": 10205488}
|
| 7643 |
+
{"current_steps": 38120, "total_steps": 38150, "loss": 0.4283, "lr": 1.0056775408828767e-10, "epoch": 9.99213630406291, "percentage": 99.92, "elapsed_time": "1:38:51", "remaining_time": "0:00:04", "throughput": 1720.84, "total_tokens": 10206608}
|
| 7644 |
+
{"current_steps": 38125, "total_steps": 38150, "loss": 0.6102, "lr": 7.07427838503305e-11, "epoch": 9.993446920052424, "percentage": 99.93, "elapsed_time": "1:38:51", "remaining_time": "0:00:03", "throughput": 1720.86, "total_tokens": 10207808}
|
| 7645 |
+
{"current_steps": 38130, "total_steps": 38150, "loss": 0.4566, "lr": 4.6150255611498996e-11, "epoch": 9.99475753604194, "percentage": 99.95, "elapsed_time": "1:38:52", "remaining_time": "0:00:03", "throughput": 1720.88, "total_tokens": 10208976}
|
| 7646 |
+
{"current_steps": 38135, "total_steps": 38150, "loss": 0.4748, "lr": 2.679017451490129e-11, "epoch": 9.996068152031455, "percentage": 99.96, "elapsed_time": "1:38:53", "remaining_time": "0:00:02", "throughput": 1720.9, "total_tokens": 10210128}
|
| 7647 |
+
{"current_steps": 38140, "total_steps": 38150, "loss": 0.4218, "lr": 1.2662544615626993e-11, "epoch": 9.997378768020969, "percentage": 99.97, "elapsed_time": "1:38:53", "remaining_time": "0:00:01", "throughput": 1720.94, "total_tokens": 10211488}
|
| 7648 |
+
{"current_steps": 38145, "total_steps": 38150, "loss": 0.517, "lr": 3.767368869644905e-12, "epoch": 9.998689384010484, "percentage": 99.99, "elapsed_time": "1:38:54", "remaining_time": "0:00:00", "throughput": 1721.01, "total_tokens": 10213040}
|
| 7649 |
+
{"current_steps": 38150, "total_steps": 38150, "loss": 0.4409, "lr": 1.0464913657859399e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:38:54", "remaining_time": "0:00:00", "throughput": 1721.0, "total_tokens": 10214104}
|
| 7650 |
+
{"current_steps": 38150, "total_steps": 38150, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:39:09", "remaining_time": "0:00:00", "throughput": 1716.93, "total_tokens": 10214104}
|