Training in progress, step 34632
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +386 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c22fd255c6b57f6b4d6cf0e0558d759850872d60ac68b2c2b5dc991239e3ce0c
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -6559,3 +6559,389 @@
|
|
| 6559 |
{"current_steps": 32710, "total_steps": 38480, "loss": 0.1168, "lr": 3.3482310747393e-06, "epoch": 17.0010395010395, "percentage": 85.01, "elapsed_time": "1:18:50", "remaining_time": "0:13:54", "throughput": 1318.21, "total_tokens": 6235336}
|
| 6560 |
{"current_steps": 32715, "total_steps": 38480, "loss": 0.1974, "lr": 3.3425645942481126e-06, "epoch": 17.003638253638254, "percentage": 85.02, "elapsed_time": "1:18:50", "remaining_time": "0:13:53", "throughput": 1318.17, "total_tokens": 6236264}
|
| 6561 |
{"current_steps": 32720, "total_steps": 38480, "loss": 0.0464, "lr": 3.336902569207484e-06, "epoch": 17.006237006237008, "percentage": 85.03, "elapsed_time": "1:18:51", "remaining_time": "0:13:52", "throughput": 1318.18, "total_tokens": 6237224}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6559 |
{"current_steps": 32710, "total_steps": 38480, "loss": 0.1168, "lr": 3.3482310747393e-06, "epoch": 17.0010395010395, "percentage": 85.01, "elapsed_time": "1:18:50", "remaining_time": "0:13:54", "throughput": 1318.21, "total_tokens": 6235336}
|
| 6560 |
{"current_steps": 32715, "total_steps": 38480, "loss": 0.1974, "lr": 3.3425645942481126e-06, "epoch": 17.003638253638254, "percentage": 85.02, "elapsed_time": "1:18:50", "remaining_time": "0:13:53", "throughput": 1318.17, "total_tokens": 6236264}
|
| 6561 |
{"current_steps": 32720, "total_steps": 38480, "loss": 0.0464, "lr": 3.336902569207484e-06, "epoch": 17.006237006237008, "percentage": 85.03, "elapsed_time": "1:18:51", "remaining_time": "0:13:52", "throughput": 1318.18, "total_tokens": 6237224}
|
| 6562 |
+
{"current_steps": 32725, "total_steps": 38480, "loss": 0.1657, "lr": 3.331245000782221e-06, "epoch": 17.008835758835758, "percentage": 85.04, "elapsed_time": "1:18:52", "remaining_time": "0:13:52", "throughput": 1318.18, "total_tokens": 6238184}
|
| 6563 |
+
{"current_steps": 32730, "total_steps": 38480, "loss": 0.1454, "lr": 3.3255918901362342e-06, "epoch": 17.011434511434512, "percentage": 85.06, "elapsed_time": "1:18:53", "remaining_time": "0:13:51", "throughput": 1318.19, "total_tokens": 6239144}
|
| 6564 |
+
{"current_steps": 32735, "total_steps": 38480, "loss": 0.1595, "lr": 3.3199432384324964e-06, "epoch": 17.014033264033262, "percentage": 85.07, "elapsed_time": "1:18:53", "remaining_time": "0:13:50", "throughput": 1318.19, "total_tokens": 6240104}
|
| 6565 |
+
{"current_steps": 32740, "total_steps": 38480, "loss": 0.1991, "lr": 3.3142990468330636e-06, "epoch": 17.016632016632016, "percentage": 85.08, "elapsed_time": "1:18:54", "remaining_time": "0:13:50", "throughput": 1318.19, "total_tokens": 6241000}
|
| 6566 |
+
{"current_steps": 32745, "total_steps": 38480, "loss": 0.0993, "lr": 3.308659316499094e-06, "epoch": 17.01923076923077, "percentage": 85.1, "elapsed_time": "1:18:55", "remaining_time": "0:13:49", "throughput": 1318.19, "total_tokens": 6241928}
|
| 6567 |
+
{"current_steps": 32750, "total_steps": 38480, "loss": 0.1135, "lr": 3.3030240485908025e-06, "epoch": 17.02182952182952, "percentage": 85.11, "elapsed_time": "1:18:55", "remaining_time": "0:13:48", "throughput": 1318.21, "total_tokens": 6242952}
|
| 6568 |
+
{"current_steps": 32755, "total_steps": 38480, "loss": 0.1664, "lr": 3.297393244267505e-06, "epoch": 17.024428274428274, "percentage": 85.12, "elapsed_time": "1:18:56", "remaining_time": "0:13:47", "throughput": 1318.22, "total_tokens": 6243912}
|
| 6569 |
+
{"current_steps": 32760, "total_steps": 38480, "loss": 0.1327, "lr": 3.2917669046875847e-06, "epoch": 17.027027027027028, "percentage": 85.14, "elapsed_time": "1:18:57", "remaining_time": "0:13:47", "throughput": 1318.22, "total_tokens": 6244872}
|
| 6570 |
+
{"current_steps": 32765, "total_steps": 38480, "loss": 0.1171, "lr": 3.286145031008522e-06, "epoch": 17.02962577962578, "percentage": 85.15, "elapsed_time": "1:18:58", "remaining_time": "0:13:46", "throughput": 1318.22, "total_tokens": 6245800}
|
| 6571 |
+
{"current_steps": 32770, "total_steps": 38480, "loss": 0.1435, "lr": 3.28052762438687e-06, "epoch": 17.032224532224532, "percentage": 85.16, "elapsed_time": "1:18:58", "remaining_time": "0:13:45", "throughput": 1318.24, "total_tokens": 6246824}
|
| 6572 |
+
{"current_steps": 32775, "total_steps": 38480, "loss": 0.0954, "lr": 3.2749146859782587e-06, "epoch": 17.034823284823286, "percentage": 85.17, "elapsed_time": "1:18:59", "remaining_time": "0:13:44", "throughput": 1318.26, "total_tokens": 6247816}
|
| 6573 |
+
{"current_steps": 32780, "total_steps": 38480, "loss": 0.1392, "lr": 3.269306216937404e-06, "epoch": 17.037422037422036, "percentage": 85.19, "elapsed_time": "1:19:00", "remaining_time": "0:13:44", "throughput": 1318.27, "total_tokens": 6248776}
|
| 6574 |
+
{"current_steps": 32785, "total_steps": 38480, "loss": 0.1601, "lr": 3.2637022184181022e-06, "epoch": 17.04002079002079, "percentage": 85.2, "elapsed_time": "1:19:00", "remaining_time": "0:13:43", "throughput": 1318.26, "total_tokens": 6249704}
|
| 6575 |
+
{"current_steps": 32790, "total_steps": 38480, "loss": 0.1791, "lr": 3.258102691573231e-06, "epoch": 17.042619542619544, "percentage": 85.21, "elapsed_time": "1:19:01", "remaining_time": "0:13:42", "throughput": 1318.28, "total_tokens": 6250696}
|
| 6576 |
+
{"current_steps": 32795, "total_steps": 38480, "loss": 0.231, "lr": 3.2525076375547387e-06, "epoch": 17.045218295218294, "percentage": 85.23, "elapsed_time": "1:19:02", "remaining_time": "0:13:42", "throughput": 1318.28, "total_tokens": 6251624}
|
| 6577 |
+
{"current_steps": 32800, "total_steps": 38480, "loss": 0.0842, "lr": 3.246917057513671e-06, "epoch": 17.04781704781705, "percentage": 85.24, "elapsed_time": "1:19:02", "remaining_time": "0:13:41", "throughput": 1318.29, "total_tokens": 6252584}
|
| 6578 |
+
{"current_steps": 32805, "total_steps": 38480, "loss": 0.0403, "lr": 3.2413309526001413e-06, "epoch": 17.050415800415802, "percentage": 85.25, "elapsed_time": "1:19:03", "remaining_time": "0:13:40", "throughput": 1318.29, "total_tokens": 6253512}
|
| 6579 |
+
{"current_steps": 32810, "total_steps": 38480, "loss": 0.1917, "lr": 3.235749323963344e-06, "epoch": 17.053014553014552, "percentage": 85.27, "elapsed_time": "1:19:04", "remaining_time": "0:13:39", "throughput": 1318.3, "total_tokens": 6254504}
|
| 6580 |
+
{"current_steps": 32815, "total_steps": 38480, "loss": 0.0736, "lr": 3.2301721727515442e-06, "epoch": 17.055613305613306, "percentage": 85.28, "elapsed_time": "1:19:05", "remaining_time": "0:13:39", "throughput": 1318.3, "total_tokens": 6255432}
|
| 6581 |
+
{"current_steps": 32820, "total_steps": 38480, "loss": 0.0658, "lr": 3.2245995001121106e-06, "epoch": 17.058212058212057, "percentage": 85.29, "elapsed_time": "1:19:05", "remaining_time": "0:13:38", "throughput": 1318.3, "total_tokens": 6256328}
|
| 6582 |
+
{"current_steps": 32825, "total_steps": 38480, "loss": 0.1309, "lr": 3.219031307191467e-06, "epoch": 17.06081081081081, "percentage": 85.3, "elapsed_time": "1:19:06", "remaining_time": "0:13:37", "throughput": 1318.3, "total_tokens": 6257288}
|
| 6583 |
+
{"current_steps": 32830, "total_steps": 38480, "loss": 0.0707, "lr": 3.213467595135122e-06, "epoch": 17.063409563409564, "percentage": 85.32, "elapsed_time": "1:19:07", "remaining_time": "0:13:36", "throughput": 1318.31, "total_tokens": 6258248}
|
| 6584 |
+
{"current_steps": 32835, "total_steps": 38480, "loss": 0.0815, "lr": 3.207908365087661e-06, "epoch": 17.066008316008315, "percentage": 85.33, "elapsed_time": "1:19:07", "remaining_time": "0:13:36", "throughput": 1318.33, "total_tokens": 6259272}
|
| 6585 |
+
{"current_steps": 32840, "total_steps": 38480, "loss": 0.0663, "lr": 3.2023536181927632e-06, "epoch": 17.06860706860707, "percentage": 85.34, "elapsed_time": "1:19:08", "remaining_time": "0:13:35", "throughput": 1318.33, "total_tokens": 6260168}
|
| 6586 |
+
{"current_steps": 32845, "total_steps": 38480, "loss": 0.4234, "lr": 3.1968033555931586e-06, "epoch": 17.071205821205822, "percentage": 85.36, "elapsed_time": "1:19:09", "remaining_time": "0:13:34", "throughput": 1318.34, "total_tokens": 6261160}
|
| 6587 |
+
{"current_steps": 32850, "total_steps": 38480, "loss": 0.0953, "lr": 3.1912575784306704e-06, "epoch": 17.073804573804573, "percentage": 85.37, "elapsed_time": "1:19:09", "remaining_time": "0:13:34", "throughput": 1318.34, "total_tokens": 6262056}
|
| 6588 |
+
{"current_steps": 32855, "total_steps": 38480, "loss": 0.1524, "lr": 3.185716287846202e-06, "epoch": 17.076403326403327, "percentage": 85.38, "elapsed_time": "1:19:10", "remaining_time": "0:13:33", "throughput": 1318.35, "total_tokens": 6263048}
|
| 6589 |
+
{"current_steps": 32860, "total_steps": 38480, "loss": 0.0407, "lr": 3.1801794849797285e-06, "epoch": 17.07900207900208, "percentage": 85.4, "elapsed_time": "1:19:11", "remaining_time": "0:13:32", "throughput": 1318.35, "total_tokens": 6263944}
|
| 6590 |
+
{"current_steps": 32865, "total_steps": 38480, "loss": 0.1851, "lr": 3.1746471709702964e-06, "epoch": 17.08160083160083, "percentage": 85.41, "elapsed_time": "1:19:12", "remaining_time": "0:13:31", "throughput": 1318.36, "total_tokens": 6264936}
|
| 6591 |
+
{"current_steps": 32870, "total_steps": 38480, "loss": 0.1014, "lr": 3.1691193469560426e-06, "epoch": 17.084199584199585, "percentage": 85.42, "elapsed_time": "1:19:12", "remaining_time": "0:13:31", "throughput": 1318.37, "total_tokens": 6265896}
|
| 6592 |
+
{"current_steps": 32875, "total_steps": 38480, "loss": 0.1383, "lr": 3.163596014074169e-06, "epoch": 17.08679833679834, "percentage": 85.43, "elapsed_time": "1:19:13", "remaining_time": "0:13:30", "throughput": 1318.36, "total_tokens": 6266792}
|
| 6593 |
+
{"current_steps": 32880, "total_steps": 38480, "loss": 0.1869, "lr": 3.158077173460955e-06, "epoch": 17.08939708939709, "percentage": 85.45, "elapsed_time": "1:19:14", "remaining_time": "0:13:29", "throughput": 1318.36, "total_tokens": 6267720}
|
| 6594 |
+
{"current_steps": 32885, "total_steps": 38480, "loss": 0.0476, "lr": 3.1525628262517536e-06, "epoch": 17.091995841995843, "percentage": 85.46, "elapsed_time": "1:19:14", "remaining_time": "0:13:28", "throughput": 1318.37, "total_tokens": 6268648}
|
| 6595 |
+
{"current_steps": 32890, "total_steps": 38480, "loss": 0.1416, "lr": 3.1470529735810077e-06, "epoch": 17.094594594594593, "percentage": 85.47, "elapsed_time": "1:19:15", "remaining_time": "0:13:28", "throughput": 1318.38, "total_tokens": 6269640}
|
| 6596 |
+
{"current_steps": 32895, "total_steps": 38480, "loss": 0.0726, "lr": 3.1415476165822215e-06, "epoch": 17.097193347193347, "percentage": 85.49, "elapsed_time": "1:19:16", "remaining_time": "0:13:27", "throughput": 1318.39, "total_tokens": 6270600}
|
| 6597 |
+
{"current_steps": 32900, "total_steps": 38480, "loss": 0.1795, "lr": 3.1360467563879753e-06, "epoch": 17.0997920997921, "percentage": 85.5, "elapsed_time": "1:19:16", "remaining_time": "0:13:26", "throughput": 1318.4, "total_tokens": 6271560}
|
| 6598 |
+
{"current_steps": 32905, "total_steps": 38480, "loss": 0.0518, "lr": 3.13055039412993e-06, "epoch": 17.10239085239085, "percentage": 85.51, "elapsed_time": "1:19:17", "remaining_time": "0:13:26", "throughput": 1318.4, "total_tokens": 6272488}
|
| 6599 |
+
{"current_steps": 32910, "total_steps": 38480, "loss": 0.1865, "lr": 3.125058530938818e-06, "epoch": 17.104989604989605, "percentage": 85.52, "elapsed_time": "1:19:18", "remaining_time": "0:13:25", "throughput": 1318.39, "total_tokens": 6273384}
|
| 6600 |
+
{"current_steps": 32915, "total_steps": 38480, "loss": 0.116, "lr": 3.1195711679444433e-06, "epoch": 17.10758835758836, "percentage": 85.54, "elapsed_time": "1:19:19", "remaining_time": "0:13:24", "throughput": 1318.4, "total_tokens": 6274344}
|
| 6601 |
+
{"current_steps": 32920, "total_steps": 38480, "loss": 0.093, "lr": 3.1140883062756835e-06, "epoch": 17.11018711018711, "percentage": 85.55, "elapsed_time": "1:19:19", "remaining_time": "0:13:23", "throughput": 1318.41, "total_tokens": 6275304}
|
| 6602 |
+
{"current_steps": 32925, "total_steps": 38480, "loss": 0.1295, "lr": 3.108609947060506e-06, "epoch": 17.112785862785863, "percentage": 85.56, "elapsed_time": "1:19:20", "remaining_time": "0:13:23", "throughput": 1318.4, "total_tokens": 6276200}
|
| 6603 |
+
{"current_steps": 32930, "total_steps": 38480, "loss": 0.1506, "lr": 3.1031360914259345e-06, "epoch": 17.115384615384617, "percentage": 85.58, "elapsed_time": "1:19:21", "remaining_time": "0:13:22", "throughput": 1318.41, "total_tokens": 6277160}
|
| 6604 |
+
{"current_steps": 32935, "total_steps": 38480, "loss": 0.1139, "lr": 3.0976667404980703e-06, "epoch": 17.117983367983367, "percentage": 85.59, "elapsed_time": "1:19:21", "remaining_time": "0:13:21", "throughput": 1318.43, "total_tokens": 6278152}
|
| 6605 |
+
{"current_steps": 32940, "total_steps": 38480, "loss": 0.1256, "lr": 3.092201895402083e-06, "epoch": 17.12058212058212, "percentage": 85.6, "elapsed_time": "1:19:22", "remaining_time": "0:13:20", "throughput": 1318.44, "total_tokens": 6279112}
|
| 6606 |
+
{"current_steps": 32945, "total_steps": 38480, "loss": 0.1428, "lr": 3.086741557262235e-06, "epoch": 17.123180873180875, "percentage": 85.62, "elapsed_time": "1:19:23", "remaining_time": "0:13:20", "throughput": 1318.45, "total_tokens": 6280104}
|
| 6607 |
+
{"current_steps": 32950, "total_steps": 38480, "loss": 0.1229, "lr": 3.081285727201841e-06, "epoch": 17.125779625779625, "percentage": 85.63, "elapsed_time": "1:19:23", "remaining_time": "0:13:19", "throughput": 1318.45, "total_tokens": 6281000}
|
| 6608 |
+
{"current_steps": 32955, "total_steps": 38480, "loss": 0.22, "lr": 3.0758344063432922e-06, "epoch": 17.12837837837838, "percentage": 85.64, "elapsed_time": "1:19:24", "remaining_time": "0:13:18", "throughput": 1318.47, "total_tokens": 6282024}
|
| 6609 |
+
{"current_steps": 32960, "total_steps": 38480, "loss": 0.096, "lr": 3.0703875958080553e-06, "epoch": 17.13097713097713, "percentage": 85.65, "elapsed_time": "1:19:25", "remaining_time": "0:13:18", "throughput": 1318.47, "total_tokens": 6282920}
|
| 6610 |
+
{"current_steps": 32965, "total_steps": 38480, "loss": 0.1316, "lr": 3.0649452967166785e-06, "epoch": 17.133575883575883, "percentage": 85.67, "elapsed_time": "1:19:26", "remaining_time": "0:13:17", "throughput": 1318.48, "total_tokens": 6283912}
|
| 6611 |
+
{"current_steps": 32970, "total_steps": 38480, "loss": 0.1558, "lr": 3.0595075101887694e-06, "epoch": 17.136174636174637, "percentage": 85.68, "elapsed_time": "1:19:26", "remaining_time": "0:13:16", "throughput": 1318.49, "total_tokens": 6284872}
|
| 6612 |
+
{"current_steps": 32975, "total_steps": 38480, "loss": 0.1461, "lr": 3.0540742373429964e-06, "epoch": 17.138773388773387, "percentage": 85.69, "elapsed_time": "1:19:27", "remaining_time": "0:13:15", "throughput": 1318.5, "total_tokens": 6285832}
|
| 6613 |
+
{"current_steps": 32980, "total_steps": 38480, "loss": 0.1376, "lr": 3.048645479297127e-06, "epoch": 17.14137214137214, "percentage": 85.71, "elapsed_time": "1:19:28", "remaining_time": "0:13:15", "throughput": 1318.51, "total_tokens": 6286792}
|
| 6614 |
+
{"current_steps": 32985, "total_steps": 38480, "loss": 0.1536, "lr": 3.0432212371679867e-06, "epoch": 17.143970893970895, "percentage": 85.72, "elapsed_time": "1:19:28", "remaining_time": "0:13:14", "throughput": 1318.53, "total_tokens": 6287816}
|
| 6615 |
+
{"current_steps": 32990, "total_steps": 38480, "loss": 0.0974, "lr": 3.0378015120714586e-06, "epoch": 17.146569646569645, "percentage": 85.73, "elapsed_time": "1:19:29", "remaining_time": "0:13:13", "throughput": 1318.54, "total_tokens": 6288776}
|
| 6616 |
+
{"current_steps": 32995, "total_steps": 38480, "loss": 0.1714, "lr": 3.0323863051225256e-06, "epoch": 17.1491683991684, "percentage": 85.75, "elapsed_time": "1:19:30", "remaining_time": "0:13:12", "throughput": 1318.53, "total_tokens": 6289640}
|
| 6617 |
+
{"current_steps": 33000, "total_steps": 38480, "loss": 0.0774, "lr": 3.0269756174352152e-06, "epoch": 17.151767151767153, "percentage": 85.76, "elapsed_time": "1:19:30", "remaining_time": "0:13:12", "throughput": 1318.54, "total_tokens": 6290600}
|
| 6618 |
+
{"current_steps": 33005, "total_steps": 38480, "loss": 0.181, "lr": 3.0215694501226384e-06, "epoch": 17.154365904365903, "percentage": 85.77, "elapsed_time": "1:19:31", "remaining_time": "0:13:11", "throughput": 1318.55, "total_tokens": 6291592}
|
| 6619 |
+
{"current_steps": 33010, "total_steps": 38480, "loss": 0.1515, "lr": 3.0161678042969627e-06, "epoch": 17.156964656964657, "percentage": 85.78, "elapsed_time": "1:19:32", "remaining_time": "0:13:10", "throughput": 1318.56, "total_tokens": 6292552}
|
| 6620 |
+
{"current_steps": 33015, "total_steps": 38480, "loss": 0.1056, "lr": 3.0107706810694523e-06, "epoch": 17.15956340956341, "percentage": 85.8, "elapsed_time": "1:19:32", "remaining_time": "0:13:10", "throughput": 1318.57, "total_tokens": 6293512}
|
| 6621 |
+
{"current_steps": 33020, "total_steps": 38480, "loss": 0.1067, "lr": 3.005378081550414e-06, "epoch": 17.16216216216216, "percentage": 85.81, "elapsed_time": "1:19:33", "remaining_time": "0:13:09", "throughput": 1318.59, "total_tokens": 6294536}
|
| 6622 |
+
{"current_steps": 33025, "total_steps": 38480, "loss": 0.1216, "lr": 2.9999900068492374e-06, "epoch": 17.164760914760915, "percentage": 85.82, "elapsed_time": "1:19:34", "remaining_time": "0:13:08", "throughput": 1318.58, "total_tokens": 6295432}
|
| 6623 |
+
{"current_steps": 33030, "total_steps": 38480, "loss": 0.2168, "lr": 2.994606458074367e-06, "epoch": 17.16735966735967, "percentage": 85.84, "elapsed_time": "1:19:35", "remaining_time": "0:13:07", "throughput": 1318.59, "total_tokens": 6296392}
|
| 6624 |
+
{"current_steps": 33035, "total_steps": 38480, "loss": 0.1081, "lr": 2.98922743633335e-06, "epoch": 17.16995841995842, "percentage": 85.85, "elapsed_time": "1:19:35", "remaining_time": "0:13:07", "throughput": 1318.59, "total_tokens": 6297320}
|
| 6625 |
+
{"current_steps": 33040, "total_steps": 38480, "loss": 0.1069, "lr": 2.9838529427327594e-06, "epoch": 17.172557172557173, "percentage": 85.86, "elapsed_time": "1:19:36", "remaining_time": "0:13:06", "throughput": 1318.61, "total_tokens": 6298312}
|
| 6626 |
+
{"current_steps": 33045, "total_steps": 38480, "loss": 0.1861, "lr": 2.978482978378258e-06, "epoch": 17.175155925155924, "percentage": 85.88, "elapsed_time": "1:19:37", "remaining_time": "0:13:05", "throughput": 1318.62, "total_tokens": 6299272}
|
| 6627 |
+
{"current_steps": 33050, "total_steps": 38480, "loss": 0.0851, "lr": 2.9731175443745818e-06, "epoch": 17.177754677754677, "percentage": 85.89, "elapsed_time": "1:19:37", "remaining_time": "0:13:04", "throughput": 1318.62, "total_tokens": 6300232}
|
| 6628 |
+
{"current_steps": 33055, "total_steps": 38480, "loss": 0.1556, "lr": 2.967756641825531e-06, "epoch": 17.18035343035343, "percentage": 85.9, "elapsed_time": "1:19:38", "remaining_time": "0:13:04", "throughput": 1318.63, "total_tokens": 6301192}
|
| 6629 |
+
{"current_steps": 33060, "total_steps": 38480, "loss": 0.0775, "lr": 2.962400271833965e-06, "epoch": 17.18295218295218, "percentage": 85.91, "elapsed_time": "1:19:39", "remaining_time": "0:13:03", "throughput": 1318.65, "total_tokens": 6302216}
|
| 6630 |
+
{"current_steps": 33065, "total_steps": 38480, "loss": 0.1791, "lr": 2.957048435501811e-06, "epoch": 17.185550935550935, "percentage": 85.93, "elapsed_time": "1:19:39", "remaining_time": "0:13:02", "throughput": 1318.65, "total_tokens": 6303144}
|
| 6631 |
+
{"current_steps": 33070, "total_steps": 38480, "loss": 0.1199, "lr": 2.9517011339300848e-06, "epoch": 17.18814968814969, "percentage": 85.94, "elapsed_time": "1:19:40", "remaining_time": "0:13:02", "throughput": 1318.65, "total_tokens": 6304040}
|
| 6632 |
+
{"current_steps": 33075, "total_steps": 38480, "loss": 0.2419, "lr": 2.9463583682188433e-06, "epoch": 17.19074844074844, "percentage": 85.95, "elapsed_time": "1:19:41", "remaining_time": "0:13:01", "throughput": 1318.65, "total_tokens": 6305000}
|
| 6633 |
+
{"current_steps": 33080, "total_steps": 38480, "loss": 0.0931, "lr": 2.9410201394672265e-06, "epoch": 17.193347193347194, "percentage": 85.97, "elapsed_time": "1:19:42", "remaining_time": "0:13:00", "throughput": 1318.67, "total_tokens": 6305992}
|
| 6634 |
+
{"current_steps": 33085, "total_steps": 38480, "loss": 0.0903, "lr": 2.9356864487734254e-06, "epoch": 17.195945945945947, "percentage": 85.98, "elapsed_time": "1:19:42", "remaining_time": "0:12:59", "throughput": 1318.66, "total_tokens": 6306888}
|
| 6635 |
+
{"current_steps": 33090, "total_steps": 38480, "loss": 0.1108, "lr": 2.930357297234718e-06, "epoch": 17.198544698544698, "percentage": 85.99, "elapsed_time": "1:19:43", "remaining_time": "0:12:59", "throughput": 1318.68, "total_tokens": 6307880}
|
| 6636 |
+
{"current_steps": 33095, "total_steps": 38480, "loss": 0.1373, "lr": 2.925032685947432e-06, "epoch": 17.20114345114345, "percentage": 86.01, "elapsed_time": "1:19:44", "remaining_time": "0:12:58", "throughput": 1318.69, "total_tokens": 6308840}
|
| 6637 |
+
{"current_steps": 33100, "total_steps": 38480, "loss": 0.0943, "lr": 2.91971261600697e-06, "epoch": 17.203742203742205, "percentage": 86.02, "elapsed_time": "1:19:44", "remaining_time": "0:12:57", "throughput": 1318.71, "total_tokens": 6309864}
|
| 6638 |
+
{"current_steps": 33105, "total_steps": 38480, "loss": 0.151, "lr": 2.914397088507795e-06, "epoch": 17.206340956340956, "percentage": 86.03, "elapsed_time": "1:19:45", "remaining_time": "0:12:56", "throughput": 1318.71, "total_tokens": 6310824}
|
| 6639 |
+
{"current_steps": 33110, "total_steps": 38480, "loss": 0.1461, "lr": 2.9090861045434405e-06, "epoch": 17.20893970893971, "percentage": 86.04, "elapsed_time": "1:19:46", "remaining_time": "0:12:56", "throughput": 1318.72, "total_tokens": 6311784}
|
| 6640 |
+
{"current_steps": 33115, "total_steps": 38480, "loss": 0.1885, "lr": 2.903779665206491e-06, "epoch": 17.21153846153846, "percentage": 86.06, "elapsed_time": "1:19:46", "remaining_time": "0:12:55", "throughput": 1318.72, "total_tokens": 6312712}
|
| 6641 |
+
{"current_steps": 33120, "total_steps": 38480, "loss": 0.0528, "lr": 2.898477771588626e-06, "epoch": 17.214137214137214, "percentage": 86.07, "elapsed_time": "1:19:47", "remaining_time": "0:12:54", "throughput": 1318.73, "total_tokens": 6313672}
|
| 6642 |
+
{"current_steps": 33125, "total_steps": 38480, "loss": 0.0765, "lr": 2.893180424780559e-06, "epoch": 17.216735966735968, "percentage": 86.08, "elapsed_time": "1:19:48", "remaining_time": "0:12:54", "throughput": 1318.73, "total_tokens": 6314600}
|
| 6643 |
+
{"current_steps": 33130, "total_steps": 38480, "loss": 0.0573, "lr": 2.887887625872085e-06, "epoch": 17.219334719334718, "percentage": 86.1, "elapsed_time": "1:19:49", "remaining_time": "0:12:53", "throughput": 1318.73, "total_tokens": 6315496}
|
| 6644 |
+
{"current_steps": 33135, "total_steps": 38480, "loss": 0.1907, "lr": 2.88259937595205e-06, "epoch": 17.221933471933472, "percentage": 86.11, "elapsed_time": "1:19:49", "remaining_time": "0:12:52", "throughput": 1318.73, "total_tokens": 6316456}
|
| 6645 |
+
{"current_steps": 33140, "total_steps": 38480, "loss": 0.1397, "lr": 2.877315676108386e-06, "epoch": 17.224532224532226, "percentage": 86.12, "elapsed_time": "1:19:50", "remaining_time": "0:12:51", "throughput": 1318.73, "total_tokens": 6317352}
|
| 6646 |
+
{"current_steps": 33145, "total_steps": 38480, "loss": 0.215, "lr": 2.8720365274280738e-06, "epoch": 17.227130977130976, "percentage": 86.14, "elapsed_time": "1:19:51", "remaining_time": "0:12:51", "throughput": 1318.74, "total_tokens": 6318312}
|
| 6647 |
+
{"current_steps": 33150, "total_steps": 38480, "loss": 0.1588, "lr": 2.8667619309971526e-06, "epoch": 17.22972972972973, "percentage": 86.15, "elapsed_time": "1:19:51", "remaining_time": "0:12:50", "throughput": 1318.73, "total_tokens": 6319176}
|
| 6648 |
+
{"current_steps": 33155, "total_steps": 38480, "loss": 0.0841, "lr": 2.861491887900733e-06, "epoch": 17.232328482328484, "percentage": 86.16, "elapsed_time": "1:19:52", "remaining_time": "0:12:49", "throughput": 1318.73, "total_tokens": 6320104}
|
| 6649 |
+
{"current_steps": 33160, "total_steps": 38480, "loss": 0.0531, "lr": 2.856226399222997e-06, "epoch": 17.234927234927234, "percentage": 86.17, "elapsed_time": "1:19:53", "remaining_time": "0:12:49", "throughput": 1318.75, "total_tokens": 6321128}
|
| 6650 |
+
{"current_steps": 33165, "total_steps": 38480, "loss": 0.0998, "lr": 2.850965466047181e-06, "epoch": 17.237525987525988, "percentage": 86.19, "elapsed_time": "1:19:53", "remaining_time": "0:12:48", "throughput": 1318.76, "total_tokens": 6322088}
|
| 6651 |
+
{"current_steps": 33170, "total_steps": 38480, "loss": 0.0766, "lr": 2.84570908945557e-06, "epoch": 17.24012474012474, "percentage": 86.2, "elapsed_time": "1:19:54", "remaining_time": "0:12:47", "throughput": 1318.77, "total_tokens": 6323080}
|
| 6652 |
+
{"current_steps": 33175, "total_steps": 38480, "loss": 0.0813, "lr": 2.8404572705295395e-06, "epoch": 17.242723492723492, "percentage": 86.21, "elapsed_time": "1:19:55", "remaining_time": "0:12:46", "throughput": 1318.78, "total_tokens": 6324040}
|
| 6653 |
+
{"current_steps": 33180, "total_steps": 38480, "loss": 0.0966, "lr": 2.835210010349512e-06, "epoch": 17.245322245322246, "percentage": 86.23, "elapsed_time": "1:19:56", "remaining_time": "0:12:46", "throughput": 1318.8, "total_tokens": 6325064}
|
| 6654 |
+
{"current_steps": 33185, "total_steps": 38480, "loss": 0.1527, "lr": 2.8299673099949684e-06, "epoch": 17.247920997920996, "percentage": 86.24, "elapsed_time": "1:19:56", "remaining_time": "0:12:45", "throughput": 1318.81, "total_tokens": 6326024}
|
| 6655 |
+
{"current_steps": 33190, "total_steps": 38480, "loss": 0.1719, "lr": 2.8247291705444575e-06, "epoch": 17.25051975051975, "percentage": 86.25, "elapsed_time": "1:19:57", "remaining_time": "0:12:44", "throughput": 1318.81, "total_tokens": 6326952}
|
| 6656 |
+
{"current_steps": 33195, "total_steps": 38480, "loss": 0.1469, "lr": 2.8194955930755967e-06, "epoch": 17.253118503118504, "percentage": 86.27, "elapsed_time": "1:19:58", "remaining_time": "0:12:43", "throughput": 1318.82, "total_tokens": 6327912}
|
| 6657 |
+
{"current_steps": 33200, "total_steps": 38480, "loss": 0.1419, "lr": 2.8142665786650534e-06, "epoch": 17.255717255717254, "percentage": 86.28, "elapsed_time": "1:19:58", "remaining_time": "0:12:43", "throughput": 1318.83, "total_tokens": 6328904}
|
| 6658 |
+
{"current_steps": 33205, "total_steps": 38480, "loss": 0.1807, "lr": 2.8090421283885586e-06, "epoch": 17.258316008316008, "percentage": 86.29, "elapsed_time": "1:19:59", "remaining_time": "0:12:42", "throughput": 1318.83, "total_tokens": 6329832}
|
| 6659 |
+
{"current_steps": 33210, "total_steps": 38480, "loss": 0.0774, "lr": 2.8038222433209057e-06, "epoch": 17.260914760914762, "percentage": 86.3, "elapsed_time": "1:20:00", "remaining_time": "0:12:41", "throughput": 1318.83, "total_tokens": 6330728}
|
| 6660 |
+
{"current_steps": 33215, "total_steps": 38480, "loss": 0.0908, "lr": 2.798606924535954e-06, "epoch": 17.263513513513512, "percentage": 86.32, "elapsed_time": "1:20:00", "remaining_time": "0:12:41", "throughput": 1318.83, "total_tokens": 6331656}
|
| 6661 |
+
{"current_steps": 33220, "total_steps": 38480, "loss": 0.1604, "lr": 2.793396173106619e-06, "epoch": 17.266112266112266, "percentage": 86.33, "elapsed_time": "1:20:01", "remaining_time": "0:12:40", "throughput": 1318.84, "total_tokens": 6332616}
|
| 6662 |
+
{"current_steps": 33225, "total_steps": 38480, "loss": 0.1427, "lr": 2.788189990104867e-06, "epoch": 17.26871101871102, "percentage": 86.34, "elapsed_time": "1:20:02", "remaining_time": "0:12:39", "throughput": 1318.85, "total_tokens": 6333576}
|
| 6663 |
+
{"current_steps": 33230, "total_steps": 38480, "loss": 0.1382, "lr": 2.78298837660175e-06, "epoch": 17.27130977130977, "percentage": 86.36, "elapsed_time": "1:20:03", "remaining_time": "0:12:38", "throughput": 1318.87, "total_tokens": 6334600}
|
| 6664 |
+
{"current_steps": 33235, "total_steps": 38480, "loss": 0.0945, "lr": 2.777791333667351e-06, "epoch": 17.273908523908524, "percentage": 86.37, "elapsed_time": "1:20:03", "remaining_time": "0:12:38", "throughput": 1318.87, "total_tokens": 6335560}
|
| 6665 |
+
{"current_steps": 33240, "total_steps": 38480, "loss": 0.224, "lr": 2.772598862370826e-06, "epoch": 17.276507276507278, "percentage": 86.38, "elapsed_time": "1:20:04", "remaining_time": "0:12:37", "throughput": 1318.87, "total_tokens": 6336456}
|
| 6666 |
+
{"current_steps": 33245, "total_steps": 38480, "loss": 0.0881, "lr": 2.767410963780395e-06, "epoch": 17.27910602910603, "percentage": 86.4, "elapsed_time": "1:20:05", "remaining_time": "0:12:36", "throughput": 1318.88, "total_tokens": 6337448}
|
| 6667 |
+
{"current_steps": 33250, "total_steps": 38480, "loss": 0.1471, "lr": 2.7622276389633328e-06, "epoch": 17.281704781704782, "percentage": 86.41, "elapsed_time": "1:20:05", "remaining_time": "0:12:35", "throughput": 1318.91, "total_tokens": 6338472}
|
| 6668 |
+
{"current_steps": 33255, "total_steps": 38480, "loss": 0.2124, "lr": 2.757048888985969e-06, "epoch": 17.284303534303533, "percentage": 86.42, "elapsed_time": "1:20:06", "remaining_time": "0:12:35", "throughput": 1318.91, "total_tokens": 6339432}
|
| 6669 |
+
{"current_steps": 33260, "total_steps": 38480, "loss": 0.0359, "lr": 2.751874714913694e-06, "epoch": 17.286902286902286, "percentage": 86.43, "elapsed_time": "1:20:07", "remaining_time": "0:12:34", "throughput": 1318.92, "total_tokens": 6340392}
|
| 6670 |
+
{"current_steps": 33265, "total_steps": 38480, "loss": 0.0786, "lr": 2.7467051178109685e-06, "epoch": 17.28950103950104, "percentage": 86.45, "elapsed_time": "1:20:07", "remaining_time": "0:12:33", "throughput": 1318.94, "total_tokens": 6341384}
|
| 6671 |
+
{"current_steps": 33270, "total_steps": 38480, "loss": 0.168, "lr": 2.7415400987412935e-06, "epoch": 17.29209979209979, "percentage": 86.46, "elapsed_time": "1:20:08", "remaining_time": "0:12:33", "throughput": 1318.92, "total_tokens": 6342248}
|
| 6672 |
+
{"current_steps": 33275, "total_steps": 38480, "loss": 0.1015, "lr": 2.736379658767238e-06, "epoch": 17.294698544698544, "percentage": 86.47, "elapsed_time": "1:20:09", "remaining_time": "0:12:32", "throughput": 1318.93, "total_tokens": 6343208}
|
| 6673 |
+
{"current_steps": 33280, "total_steps": 38480, "loss": 0.1734, "lr": 2.7312237989504247e-06, "epoch": 17.2972972972973, "percentage": 86.49, "elapsed_time": "1:20:10", "remaining_time": "0:12:31", "throughput": 1318.93, "total_tokens": 6344136}
|
| 6674 |
+
{"current_steps": 33285, "total_steps": 38480, "loss": 0.0849, "lr": 2.726072520351544e-06, "epoch": 17.29989604989605, "percentage": 86.5, "elapsed_time": "1:20:10", "remaining_time": "0:12:30", "throughput": 1318.94, "total_tokens": 6345096}
|
| 6675 |
+
{"current_steps": 33290, "total_steps": 38480, "loss": 0.1857, "lr": 2.720925824030335e-06, "epoch": 17.302494802494802, "percentage": 86.51, "elapsed_time": "1:20:11", "remaining_time": "0:12:30", "throughput": 1318.96, "total_tokens": 6346120}
|
| 6676 |
+
{"current_steps": 33295, "total_steps": 38480, "loss": 0.2038, "lr": 2.7157837110455913e-06, "epoch": 17.305093555093556, "percentage": 86.53, "elapsed_time": "1:20:12", "remaining_time": "0:12:29", "throughput": 1318.97, "total_tokens": 6347080}
|
| 6677 |
+
{"current_steps": 33300, "total_steps": 38480, "loss": 0.0738, "lr": 2.710646182455173e-06, "epoch": 17.307692307692307, "percentage": 86.54, "elapsed_time": "1:20:12", "remaining_time": "0:12:28", "throughput": 1318.97, "total_tokens": 6348008}
|
| 6678 |
+
{"current_steps": 33305, "total_steps": 38480, "loss": 0.0776, "lr": 2.705513239315988e-06, "epoch": 17.31029106029106, "percentage": 86.55, "elapsed_time": "1:20:13", "remaining_time": "0:12:27", "throughput": 1318.99, "total_tokens": 6349032}
|
| 6679 |
+
{"current_steps": 33310, "total_steps": 38480, "loss": 0.1583, "lr": 2.7003848826840067e-06, "epoch": 17.312889812889814, "percentage": 86.56, "elapsed_time": "1:20:14", "remaining_time": "0:12:27", "throughput": 1319.0, "total_tokens": 6349960}
|
| 6680 |
+
{"current_steps": 33315, "total_steps": 38480, "loss": 0.1293, "lr": 2.6952611136142477e-06, "epoch": 17.315488565488565, "percentage": 86.58, "elapsed_time": "1:20:14", "remaining_time": "0:12:26", "throughput": 1318.99, "total_tokens": 6350856}
|
| 6681 |
+
{"current_steps": 33320, "total_steps": 38480, "loss": 0.1131, "lr": 2.6901419331608057e-06, "epoch": 17.31808731808732, "percentage": 86.59, "elapsed_time": "1:20:15", "remaining_time": "0:12:25", "throughput": 1319.01, "total_tokens": 6351912}
|
| 6682 |
+
{"current_steps": 33325, "total_steps": 38480, "loss": 0.1406, "lr": 2.6850273423768115e-06, "epoch": 17.320686070686072, "percentage": 86.6, "elapsed_time": "1:20:16", "remaining_time": "0:12:25", "throughput": 1319.03, "total_tokens": 6352904}
|
| 6683 |
+
{"current_steps": 33330, "total_steps": 38480, "loss": 0.1159, "lr": 2.6799173423144584e-06, "epoch": 17.323284823284823, "percentage": 86.62, "elapsed_time": "1:20:17", "remaining_time": "0:12:24", "throughput": 1319.04, "total_tokens": 6353864}
|
| 6684 |
+
{"current_steps": 33335, "total_steps": 38480, "loss": 0.1723, "lr": 2.67481193402499e-06, "epoch": 17.325883575883577, "percentage": 86.63, "elapsed_time": "1:20:17", "remaining_time": "0:12:23", "throughput": 1319.04, "total_tokens": 6354792}
|
| 6685 |
+
{"current_steps": 33340, "total_steps": 38480, "loss": 0.1511, "lr": 2.669711118558718e-06, "epoch": 17.328482328482327, "percentage": 86.64, "elapsed_time": "1:20:18", "remaining_time": "0:12:22", "throughput": 1319.06, "total_tokens": 6355816}
|
| 6686 |
+
{"current_steps": 33345, "total_steps": 38480, "loss": 0.2352, "lr": 2.664614896965001e-06, "epoch": 17.33108108108108, "percentage": 86.66, "elapsed_time": "1:20:19", "remaining_time": "0:12:22", "throughput": 1319.08, "total_tokens": 6356840}
|
| 6687 |
+
{"current_steps": 33350, "total_steps": 38480, "loss": 0.1009, "lr": 2.659523270292244e-06, "epoch": 17.333679833679835, "percentage": 86.67, "elapsed_time": "1:20:19", "remaining_time": "0:12:21", "throughput": 1319.09, "total_tokens": 6357800}
|
| 6688 |
+
{"current_steps": 33355, "total_steps": 38480, "loss": 0.1423, "lr": 2.6544362395879317e-06, "epoch": 17.336278586278585, "percentage": 86.68, "elapsed_time": "1:20:20", "remaining_time": "0:12:20", "throughput": 1319.09, "total_tokens": 6358728}
|
| 6689 |
+
{"current_steps": 33360, "total_steps": 38480, "loss": 0.0572, "lr": 2.6493538058985775e-06, "epoch": 17.33887733887734, "percentage": 86.69, "elapsed_time": "1:20:21", "remaining_time": "0:12:19", "throughput": 1319.1, "total_tokens": 6359720}
|
| 6690 |
+
{"current_steps": 33365, "total_steps": 38480, "loss": 0.1425, "lr": 2.6442759702697606e-06, "epoch": 17.341476091476093, "percentage": 86.71, "elapsed_time": "1:20:21", "remaining_time": "0:12:19", "throughput": 1319.09, "total_tokens": 6360584}
|
| 6691 |
+
{"current_steps": 33370, "total_steps": 38480, "loss": 0.0865, "lr": 2.639202733746113e-06, "epoch": 17.344074844074843, "percentage": 86.72, "elapsed_time": "1:20:22", "remaining_time": "0:12:18", "throughput": 1319.11, "total_tokens": 6361576}
|
| 6692 |
+
{"current_steps": 33375, "total_steps": 38480, "loss": 0.0978, "lr": 2.6341340973713187e-06, "epoch": 17.346673596673597, "percentage": 86.73, "elapsed_time": "1:20:23", "remaining_time": "0:12:17", "throughput": 1319.11, "total_tokens": 6362504}
|
| 6693 |
+
{"current_steps": 33380, "total_steps": 38480, "loss": 0.0318, "lr": 2.629070062188119e-06, "epoch": 17.34927234927235, "percentage": 86.75, "elapsed_time": "1:20:24", "remaining_time": "0:12:17", "throughput": 1319.1, "total_tokens": 6363400}
|
| 6694 |
+
{"current_steps": 33385, "total_steps": 38480, "loss": 0.0999, "lr": 2.6240106292383022e-06, "epoch": 17.3518711018711, "percentage": 86.76, "elapsed_time": "1:20:24", "remaining_time": "0:12:16", "throughput": 1319.11, "total_tokens": 6364328}
|
| 6695 |
+
{"current_steps": 33390, "total_steps": 38480, "loss": 0.13, "lr": 2.618955799562722e-06, "epoch": 17.354469854469855, "percentage": 86.77, "elapsed_time": "1:20:25", "remaining_time": "0:12:15", "throughput": 1319.11, "total_tokens": 6365288}
|
| 6696 |
+
{"current_steps": 33395, "total_steps": 38480, "loss": 0.1355, "lr": 2.613905574201275e-06, "epoch": 17.35706860706861, "percentage": 86.79, "elapsed_time": "1:20:26", "remaining_time": "0:12:14", "throughput": 1319.13, "total_tokens": 6366280}
|
| 6697 |
+
{"current_steps": 33400, "total_steps": 38480, "loss": 0.072, "lr": 2.6088599541929104e-06, "epoch": 17.35966735966736, "percentage": 86.8, "elapsed_time": "1:20:26", "remaining_time": "0:12:14", "throughput": 1319.14, "total_tokens": 6367240}
|
| 6698 |
+
{"current_steps": 33405, "total_steps": 38480, "loss": 0.2421, "lr": 2.603818940575628e-06, "epoch": 17.362266112266113, "percentage": 86.81, "elapsed_time": "1:20:27", "remaining_time": "0:12:13", "throughput": 1319.14, "total_tokens": 6368168}
|
| 6699 |
+
{"current_steps": 33410, "total_steps": 38480, "loss": 0.085, "lr": 2.5987825343864956e-06, "epoch": 17.364864864864863, "percentage": 86.82, "elapsed_time": "1:20:28", "remaining_time": "0:12:12", "throughput": 1319.15, "total_tokens": 6369128}
|
| 6700 |
+
{"current_steps": 33415, "total_steps": 38480, "loss": 0.1869, "lr": 2.5937507366616154e-06, "epoch": 17.367463617463617, "percentage": 86.84, "elapsed_time": "1:20:28", "remaining_time": "0:12:11", "throughput": 1319.15, "total_tokens": 6370056}
|
| 6701 |
+
{"current_steps": 33420, "total_steps": 38480, "loss": 0.0808, "lr": 2.5887235484361506e-06, "epoch": 17.37006237006237, "percentage": 86.85, "elapsed_time": "1:20:29", "remaining_time": "0:12:11", "throughput": 1319.17, "total_tokens": 6371080}
|
| 6702 |
+
{"current_steps": 33425, "total_steps": 38480, "loss": 0.1231, "lr": 2.5837009707443042e-06, "epoch": 17.37266112266112, "percentage": 86.86, "elapsed_time": "1:20:30", "remaining_time": "0:12:10", "throughput": 1319.18, "total_tokens": 6372072}
|
| 6703 |
+
{"current_steps": 33430, "total_steps": 38480, "loss": 0.1329, "lr": 2.5786830046193587e-06, "epoch": 17.375259875259875, "percentage": 86.88, "elapsed_time": "1:20:31", "remaining_time": "0:12:09", "throughput": 1319.2, "total_tokens": 6373064}
|
| 6704 |
+
{"current_steps": 33435, "total_steps": 38480, "loss": 0.1675, "lr": 2.573669651093616e-06, "epoch": 17.37785862785863, "percentage": 86.89, "elapsed_time": "1:20:31", "remaining_time": "0:12:09", "throughput": 1319.2, "total_tokens": 6373992}
|
| 6705 |
+
{"current_steps": 33440, "total_steps": 38480, "loss": 0.1652, "lr": 2.5686609111984368e-06, "epoch": 17.38045738045738, "percentage": 86.9, "elapsed_time": "1:20:32", "remaining_time": "0:12:08", "throughput": 1319.21, "total_tokens": 6374952}
|
| 6706 |
+
{"current_steps": 33445, "total_steps": 38480, "loss": 0.1347, "lr": 2.5636567859642525e-06, "epoch": 17.383056133056133, "percentage": 86.92, "elapsed_time": "1:20:33", "remaining_time": "0:12:07", "throughput": 1319.22, "total_tokens": 6375944}
|
| 6707 |
+
{"current_steps": 33450, "total_steps": 38480, "loss": 0.2083, "lr": 2.558657276420526e-06, "epoch": 17.385654885654887, "percentage": 86.93, "elapsed_time": "1:20:33", "remaining_time": "0:12:06", "throughput": 1319.22, "total_tokens": 6376872}
|
| 6708 |
+
{"current_steps": 33455, "total_steps": 38480, "loss": 0.09, "lr": 2.5536623835957727e-06, "epoch": 17.388253638253637, "percentage": 86.94, "elapsed_time": "1:20:34", "remaining_time": "0:12:06", "throughput": 1319.24, "total_tokens": 6377864}
|
| 6709 |
+
{"current_steps": 33460, "total_steps": 38480, "loss": 0.1319, "lr": 2.548672108517558e-06, "epoch": 17.39085239085239, "percentage": 86.95, "elapsed_time": "1:20:35", "remaining_time": "0:12:05", "throughput": 1319.23, "total_tokens": 6378760}
|
| 6710 |
+
{"current_steps": 33465, "total_steps": 38480, "loss": 0.1061, "lr": 2.54368645221251e-06, "epoch": 17.393451143451145, "percentage": 86.97, "elapsed_time": "1:20:35", "remaining_time": "0:12:04", "throughput": 1319.23, "total_tokens": 6379656}
|
| 6711 |
+
{"current_steps": 33470, "total_steps": 38480, "loss": 0.1478, "lr": 2.538705415706291e-06, "epoch": 17.396049896049895, "percentage": 86.98, "elapsed_time": "1:20:36", "remaining_time": "0:12:03", "throughput": 1319.23, "total_tokens": 6380584}
|
| 6712 |
+
{"current_steps": 33475, "total_steps": 38480, "loss": 0.2079, "lr": 2.533729000023616e-06, "epoch": 17.39864864864865, "percentage": 86.99, "elapsed_time": "1:20:37", "remaining_time": "0:12:03", "throughput": 1319.23, "total_tokens": 6381512}
|
| 6713 |
+
{"current_steps": 33480, "total_steps": 38480, "loss": 0.1044, "lr": 2.5287572061882618e-06, "epoch": 17.401247401247403, "percentage": 87.01, "elapsed_time": "1:20:38", "remaining_time": "0:12:02", "throughput": 1319.24, "total_tokens": 6382504}
|
| 6714 |
+
{"current_steps": 33485, "total_steps": 38480, "loss": 0.2152, "lr": 2.5237900352230393e-06, "epoch": 17.403846153846153, "percentage": 87.02, "elapsed_time": "1:20:38", "remaining_time": "0:12:01", "throughput": 1319.26, "total_tokens": 6383496}
|
| 6715 |
+
{"current_steps": 33490, "total_steps": 38480, "loss": 0.1351, "lr": 2.5188274881498136e-06, "epoch": 17.406444906444907, "percentage": 87.03, "elapsed_time": "1:20:39", "remaining_time": "0:12:01", "throughput": 1319.26, "total_tokens": 6384424}
|
| 6716 |
+
{"current_steps": 33495, "total_steps": 38480, "loss": 0.1929, "lr": 2.513869565989499e-06, "epoch": 17.409043659043657, "percentage": 87.05, "elapsed_time": "1:20:40", "remaining_time": "0:12:00", "throughput": 1319.27, "total_tokens": 6385384}
|
| 6717 |
+
{"current_steps": 33500, "total_steps": 38480, "loss": 0.1064, "lr": 2.508916269762057e-06, "epoch": 17.41164241164241, "percentage": 87.06, "elapsed_time": "1:20:40", "remaining_time": "0:11:59", "throughput": 1319.27, "total_tokens": 6386312}
|
| 6718 |
+
{"current_steps": 33505, "total_steps": 38480, "loss": 0.0937, "lr": 2.503967600486504e-06, "epoch": 17.414241164241165, "percentage": 87.07, "elapsed_time": "1:20:41", "remaining_time": "0:11:58", "throughput": 1319.27, "total_tokens": 6387240}
|
| 6719 |
+
{"current_steps": 33510, "total_steps": 38480, "loss": 0.2095, "lr": 2.499023559180888e-06, "epoch": 17.416839916839916, "percentage": 87.08, "elapsed_time": "1:20:42", "remaining_time": "0:11:58", "throughput": 1319.26, "total_tokens": 6388136}
|
| 6720 |
+
{"current_steps": 33515, "total_steps": 38480, "loss": 0.1336, "lr": 2.4940841468623272e-06, "epoch": 17.41943866943867, "percentage": 87.1, "elapsed_time": "1:20:42", "remaining_time": "0:11:57", "throughput": 1319.25, "total_tokens": 6389000}
|
| 6721 |
+
{"current_steps": 33520, "total_steps": 38480, "loss": 0.0941, "lr": 2.489149364546975e-06, "epoch": 17.422037422037423, "percentage": 87.11, "elapsed_time": "1:20:43", "remaining_time": "0:11:56", "throughput": 1319.26, "total_tokens": 6389960}
|
| 6722 |
+
{"current_steps": 33525, "total_steps": 38480, "loss": 0.0919, "lr": 2.48421921325003e-06, "epoch": 17.424636174636174, "percentage": 87.12, "elapsed_time": "1:20:44", "remaining_time": "0:11:55", "throughput": 1319.28, "total_tokens": 6390952}
|
| 6723 |
+
{"current_steps": 33530, "total_steps": 38480, "loss": 0.1158, "lr": 2.47929369398574e-06, "epoch": 17.427234927234927, "percentage": 87.14, "elapsed_time": "1:20:44", "remaining_time": "0:11:55", "throughput": 1319.29, "total_tokens": 6391912}
|
| 6724 |
+
{"current_steps": 33535, "total_steps": 38480, "loss": 0.0853, "lr": 2.474372807767408e-06, "epoch": 17.42983367983368, "percentage": 87.15, "elapsed_time": "1:20:45", "remaining_time": "0:11:54", "throughput": 1319.28, "total_tokens": 6392808}
|
| 6725 |
+
{"current_steps": 33540, "total_steps": 38480, "loss": 0.0404, "lr": 2.4694565556073746e-06, "epoch": 17.43243243243243, "percentage": 87.16, "elapsed_time": "1:20:46", "remaining_time": "0:11:53", "throughput": 1319.27, "total_tokens": 6393704}
|
| 6726 |
+
{"current_steps": 33545, "total_steps": 38480, "loss": 0.1636, "lr": 2.46454493851703e-06, "epoch": 17.435031185031185, "percentage": 87.18, "elapsed_time": "1:20:47", "remaining_time": "0:11:53", "throughput": 1319.29, "total_tokens": 6394696}
|
| 6727 |
+
{"current_steps": 33550, "total_steps": 38480, "loss": 0.0661, "lr": 2.459637957506808e-06, "epoch": 17.43762993762994, "percentage": 87.19, "elapsed_time": "1:20:47", "remaining_time": "0:11:52", "throughput": 1319.29, "total_tokens": 6395592}
|
| 6728 |
+
{"current_steps": 33555, "total_steps": 38480, "loss": 0.0871, "lr": 2.4547356135861954e-06, "epoch": 17.44022869022869, "percentage": 87.2, "elapsed_time": "1:20:48", "remaining_time": "0:11:51", "throughput": 1319.29, "total_tokens": 6396520}
|
| 6729 |
+
{"current_steps": 33560, "total_steps": 38480, "loss": 0.103, "lr": 2.4498379077637275e-06, "epoch": 17.442827442827443, "percentage": 87.21, "elapsed_time": "1:20:49", "remaining_time": "0:11:50", "throughput": 1319.31, "total_tokens": 6397544}
|
| 6730 |
+
{"current_steps": 33565, "total_steps": 38480, "loss": 0.2517, "lr": 2.4449448410469616e-06, "epoch": 17.445426195426194, "percentage": 87.23, "elapsed_time": "1:20:49", "remaining_time": "0:11:50", "throughput": 1319.32, "total_tokens": 6398504}
|
| 6731 |
+
{"current_steps": 33570, "total_steps": 38480, "loss": 0.1719, "lr": 2.440056414442532e-06, "epoch": 17.448024948024948, "percentage": 87.24, "elapsed_time": "1:20:50", "remaining_time": "0:11:49", "throughput": 1319.33, "total_tokens": 6399464}
|
| 6732 |
+
{"current_steps": 33575, "total_steps": 38480, "loss": 0.0506, "lr": 2.4351726289561035e-06, "epoch": 17.4506237006237, "percentage": 87.25, "elapsed_time": "1:20:51", "remaining_time": "0:11:48", "throughput": 1319.33, "total_tokens": 6400392}
|
| 6733 |
+
{"current_steps": 33580, "total_steps": 38480, "loss": 0.0402, "lr": 2.4302934855923804e-06, "epoch": 17.453222453222452, "percentage": 87.27, "elapsed_time": "1:20:51", "remaining_time": "0:11:47", "throughput": 1319.34, "total_tokens": 6401384}
|
| 6734 |
+
{"current_steps": 33585, "total_steps": 38480, "loss": 0.0905, "lr": 2.4254189853551214e-06, "epoch": 17.455821205821206, "percentage": 87.28, "elapsed_time": "1:20:52", "remaining_time": "0:11:47", "throughput": 1319.35, "total_tokens": 6402344}
|
| 6735 |
+
{"current_steps": 33590, "total_steps": 38480, "loss": 0.0804, "lr": 2.420549129247132e-06, "epoch": 17.45841995841996, "percentage": 87.29, "elapsed_time": "1:20:53", "remaining_time": "0:11:46", "throughput": 1319.34, "total_tokens": 6403208}
|
| 6736 |
+
{"current_steps": 33595, "total_steps": 38480, "loss": 0.09, "lr": 2.4156839182702534e-06, "epoch": 17.46101871101871, "percentage": 87.31, "elapsed_time": "1:20:54", "remaining_time": "0:11:45", "throughput": 1319.35, "total_tokens": 6404168}
|
| 6737 |
+
{"current_steps": 33600, "total_steps": 38480, "loss": 0.1519, "lr": 2.4108233534253734e-06, "epoch": 17.463617463617464, "percentage": 87.32, "elapsed_time": "1:20:54", "remaining_time": "0:11:45", "throughput": 1319.39, "total_tokens": 6405288}
|
| 6738 |
+
{"current_steps": 33605, "total_steps": 38480, "loss": 0.216, "lr": 2.4059674357124335e-06, "epoch": 17.466216216216218, "percentage": 87.33, "elapsed_time": "1:20:55", "remaining_time": "0:11:44", "throughput": 1319.39, "total_tokens": 6406216}
|
| 6739 |
+
{"current_steps": 33610, "total_steps": 38480, "loss": 0.1062, "lr": 2.4011161661304072e-06, "epoch": 17.468814968814968, "percentage": 87.34, "elapsed_time": "1:20:56", "remaining_time": "0:11:43", "throughput": 1319.39, "total_tokens": 6407176}
|
| 6740 |
+
{"current_steps": 33615, "total_steps": 38480, "loss": 0.096, "lr": 2.3962695456773153e-06, "epoch": 17.47141372141372, "percentage": 87.36, "elapsed_time": "1:20:56", "remaining_time": "0:11:42", "throughput": 1319.4, "total_tokens": 6408136}
|
| 6741 |
+
{"current_steps": 33620, "total_steps": 38480, "loss": 0.0812, "lr": 2.391427575350219e-06, "epoch": 17.474012474012476, "percentage": 87.37, "elapsed_time": "1:20:57", "remaining_time": "0:11:42", "throughput": 1319.41, "total_tokens": 6409096}
|
| 6742 |
+
{"current_steps": 33625, "total_steps": 38480, "loss": 0.2186, "lr": 2.3865902561452437e-06, "epoch": 17.476611226611226, "percentage": 87.38, "elapsed_time": "1:20:58", "remaining_time": "0:11:41", "throughput": 1319.42, "total_tokens": 6410056}
|
| 6743 |
+
{"current_steps": 33630, "total_steps": 38480, "loss": 0.1674, "lr": 2.3817575890575233e-06, "epoch": 17.47920997920998, "percentage": 87.4, "elapsed_time": "1:20:58", "remaining_time": "0:11:40", "throughput": 1319.43, "total_tokens": 6411048}
|
| 6744 |
+
{"current_steps": 33635, "total_steps": 38480, "loss": 0.1668, "lr": 2.376929575081252e-06, "epoch": 17.48180873180873, "percentage": 87.41, "elapsed_time": "1:20:59", "remaining_time": "0:11:40", "throughput": 1319.44, "total_tokens": 6412008}
|
| 6745 |
+
{"current_steps": 33640, "total_steps": 38480, "loss": 0.0569, "lr": 2.3721062152096825e-06, "epoch": 17.484407484407484, "percentage": 87.42, "elapsed_time": "1:21:00", "remaining_time": "0:11:39", "throughput": 1319.44, "total_tokens": 6412936}
|
| 6746 |
+
{"current_steps": 33645, "total_steps": 38480, "loss": 0.1625, "lr": 2.3672875104350844e-06, "epoch": 17.487006237006238, "percentage": 87.44, "elapsed_time": "1:21:01", "remaining_time": "0:11:38", "throughput": 1319.46, "total_tokens": 6413928}
|
| 6747 |
+
{"current_steps": 33650, "total_steps": 38480, "loss": 0.0588, "lr": 2.3624734617487848e-06, "epoch": 17.489604989604988, "percentage": 87.45, "elapsed_time": "1:21:01", "remaining_time": "0:11:37", "throughput": 1319.46, "total_tokens": 6414856}
|
| 6748 |
+
{"current_steps": 33655, "total_steps": 38480, "loss": 0.1275, "lr": 2.3576640701411383e-06, "epoch": 17.492203742203742, "percentage": 87.46, "elapsed_time": "1:21:02", "remaining_time": "0:11:37", "throughput": 1319.48, "total_tokens": 6415880}
|
| 6749 |
+
{"current_steps": 33660, "total_steps": 38480, "loss": 0.1164, "lr": 2.352859336601565e-06, "epoch": 17.494802494802496, "percentage": 87.47, "elapsed_time": "1:21:03", "remaining_time": "0:11:36", "throughput": 1319.49, "total_tokens": 6416840}
|
| 6750 |
+
{"current_steps": 33665, "total_steps": 38480, "loss": 0.0898, "lr": 2.348059262118507e-06, "epoch": 17.497401247401246, "percentage": 87.49, "elapsed_time": "1:21:03", "remaining_time": "0:11:35", "throughput": 1319.5, "total_tokens": 6417800}
|
| 6751 |
+
{"current_steps": 33670, "total_steps": 38480, "loss": 0.0488, "lr": 2.343263847679453e-06, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "1:21:04", "remaining_time": "0:11:34", "throughput": 1319.5, "total_tokens": 6418696}
|
| 6752 |
+
{"current_steps": 33675, "total_steps": 38480, "loss": 0.0396, "lr": 2.338473094270929e-06, "epoch": 17.502598752598754, "percentage": 87.51, "elapsed_time": "1:21:05", "remaining_time": "0:11:34", "throughput": 1319.49, "total_tokens": 6419592}
|
| 6753 |
+
{"current_steps": 33680, "total_steps": 38480, "loss": 0.0831, "lr": 2.333687002878521e-06, "epoch": 17.505197505197504, "percentage": 87.53, "elapsed_time": "1:21:05", "remaining_time": "0:11:33", "throughput": 1319.5, "total_tokens": 6420584}
|
| 6754 |
+
{"current_steps": 33685, "total_steps": 38480, "loss": 0.1663, "lr": 2.328905574486831e-06, "epoch": 17.507796257796258, "percentage": 87.54, "elapsed_time": "1:21:06", "remaining_time": "0:11:32", "throughput": 1319.52, "total_tokens": 6421576}
|
| 6755 |
+
{"current_steps": 33690, "total_steps": 38480, "loss": 0.2633, "lr": 2.3241288100795178e-06, "epoch": 17.510395010395012, "percentage": 87.55, "elapsed_time": "1:21:07", "remaining_time": "0:11:32", "throughput": 1319.52, "total_tokens": 6422504}
|
| 6756 |
+
{"current_steps": 33695, "total_steps": 38480, "loss": 0.1577, "lr": 2.3193567106392728e-06, "epoch": 17.512993762993762, "percentage": 87.56, "elapsed_time": "1:21:07", "remaining_time": "0:11:31", "throughput": 1319.53, "total_tokens": 6423464}
|
| 6757 |
+
{"current_steps": 33700, "total_steps": 38480, "loss": 0.1435, "lr": 2.31458927714783e-06, "epoch": 17.515592515592516, "percentage": 87.58, "elapsed_time": "1:21:08", "remaining_time": "0:11:30", "throughput": 1319.53, "total_tokens": 6424360}
|
| 6758 |
+
{"current_steps": 33705, "total_steps": 38480, "loss": 0.0858, "lr": 2.309826510585966e-06, "epoch": 17.518191268191266, "percentage": 87.59, "elapsed_time": "1:21:09", "remaining_time": "0:11:29", "throughput": 1319.53, "total_tokens": 6425288}
|
| 6759 |
+
{"current_steps": 33710, "total_steps": 38480, "loss": 0.1473, "lr": 2.305068411933492e-06, "epoch": 17.52079002079002, "percentage": 87.6, "elapsed_time": "1:21:10", "remaining_time": "0:11:29", "throughput": 1319.54, "total_tokens": 6426280}
|
| 6760 |
+
{"current_steps": 33715, "total_steps": 38480, "loss": 0.101, "lr": 2.3003149821692687e-06, "epoch": 17.523388773388774, "percentage": 87.62, "elapsed_time": "1:21:10", "remaining_time": "0:11:28", "throughput": 1319.56, "total_tokens": 6427272}
|
| 6761 |
+
{"current_steps": 33720, "total_steps": 38480, "loss": 0.1795, "lr": 2.295566222271189e-06, "epoch": 17.525987525987524, "percentage": 87.63, "elapsed_time": "1:21:11", "remaining_time": "0:11:27", "throughput": 1319.57, "total_tokens": 6428232}
|
| 6762 |
+
{"current_steps": 33725, "total_steps": 38480, "loss": 0.0402, "lr": 2.290822133216175e-06, "epoch": 17.52858627858628, "percentage": 87.64, "elapsed_time": "1:21:12", "remaining_time": "0:11:26", "throughput": 1319.57, "total_tokens": 6429192}
|
| 6763 |
+
{"current_steps": 33730, "total_steps": 38480, "loss": 0.0848, "lr": 2.2860827159802146e-06, "epoch": 17.531185031185032, "percentage": 87.66, "elapsed_time": "1:21:12", "remaining_time": "0:11:26", "throughput": 1319.59, "total_tokens": 6430184}
|
| 6764 |
+
{"current_steps": 33735, "total_steps": 38480, "loss": 0.1429, "lr": 2.281347971538311e-06, "epoch": 17.533783783783782, "percentage": 87.67, "elapsed_time": "1:21:13", "remaining_time": "0:11:25", "throughput": 1319.59, "total_tokens": 6431112}
|
| 6765 |
+
{"current_steps": 33740, "total_steps": 38480, "loss": 0.0966, "lr": 2.276617900864514e-06, "epoch": 17.536382536382536, "percentage": 87.68, "elapsed_time": "1:21:14", "remaining_time": "0:11:24", "throughput": 1319.58, "total_tokens": 6432008}
|
| 6766 |
+
{"current_steps": 33745, "total_steps": 38480, "loss": 0.0386, "lr": 2.271892504931905e-06, "epoch": 17.53898128898129, "percentage": 87.69, "elapsed_time": "1:21:14", "remaining_time": "0:11:24", "throughput": 1319.59, "total_tokens": 6432968}
|
| 6767 |
+
{"current_steps": 33750, "total_steps": 38480, "loss": 0.1087, "lr": 2.267171784712627e-06, "epoch": 17.54158004158004, "percentage": 87.71, "elapsed_time": "1:21:15", "remaining_time": "0:11:23", "throughput": 1319.6, "total_tokens": 6433928}
|
| 6768 |
+
{"current_steps": 33755, "total_steps": 38480, "loss": 0.264, "lr": 2.262455741177838e-06, "epoch": 17.544178794178794, "percentage": 87.72, "elapsed_time": "1:21:16", "remaining_time": "0:11:22", "throughput": 1319.62, "total_tokens": 6434952}
|
| 6769 |
+
{"current_steps": 33760, "total_steps": 38480, "loss": 0.1126, "lr": 2.2577443752977297e-06, "epoch": 17.546777546777548, "percentage": 87.73, "elapsed_time": "1:21:17", "remaining_time": "0:11:21", "throughput": 1319.64, "total_tokens": 6435944}
|
| 6770 |
+
{"current_steps": 33765, "total_steps": 38480, "loss": 0.153, "lr": 2.253037688041554e-06, "epoch": 17.5493762993763, "percentage": 87.75, "elapsed_time": "1:21:17", "remaining_time": "0:11:21", "throughput": 1319.65, "total_tokens": 6436936}
|
| 6771 |
+
{"current_steps": 33770, "total_steps": 38480, "loss": 0.1818, "lr": 2.248335680377586e-06, "epoch": 17.551975051975052, "percentage": 87.76, "elapsed_time": "1:21:18", "remaining_time": "0:11:20", "throughput": 1319.66, "total_tokens": 6437896}
|
| 6772 |
+
{"current_steps": 33775, "total_steps": 38480, "loss": 0.1119, "lr": 2.243638353273139e-06, "epoch": 17.554573804573806, "percentage": 87.77, "elapsed_time": "1:21:19", "remaining_time": "0:11:19", "throughput": 1319.69, "total_tokens": 6438984}
|
| 6773 |
+
{"current_steps": 33780, "total_steps": 38480, "loss": 0.1278, "lr": 2.238945707694562e-06, "epoch": 17.557172557172557, "percentage": 87.79, "elapsed_time": "1:21:19", "remaining_time": "0:11:18", "throughput": 1319.7, "total_tokens": 6439944}
|
| 6774 |
+
{"current_steps": 33785, "total_steps": 38480, "loss": 0.1278, "lr": 2.2342577446072544e-06, "epoch": 17.55977130977131, "percentage": 87.8, "elapsed_time": "1:21:20", "remaining_time": "0:11:18", "throughput": 1319.72, "total_tokens": 6440968}
|
| 6775 |
+
{"current_steps": 33790, "total_steps": 38480, "loss": 0.111, "lr": 2.229574464975634e-06, "epoch": 17.56237006237006, "percentage": 87.81, "elapsed_time": "1:21:21", "remaining_time": "0:11:17", "throughput": 1319.73, "total_tokens": 6441960}
|
| 6776 |
+
{"current_steps": 33795, "total_steps": 38480, "loss": 0.1066, "lr": 2.2248958697631634e-06, "epoch": 17.564968814968815, "percentage": 87.82, "elapsed_time": "1:21:21", "remaining_time": "0:11:16", "throughput": 1319.71, "total_tokens": 6442792}
|
| 6777 |
+
{"current_steps": 33800, "total_steps": 38480, "loss": 0.2212, "lr": 2.2202219599323394e-06, "epoch": 17.56756756756757, "percentage": 87.84, "elapsed_time": "1:21:22", "remaining_time": "0:11:16", "throughput": 1319.71, "total_tokens": 6443688}
|
| 6778 |
+
{"current_steps": 33805, "total_steps": 38480, "loss": 0.105, "lr": 2.2155527364447064e-06, "epoch": 17.57016632016632, "percentage": 87.85, "elapsed_time": "1:21:23", "remaining_time": "0:11:15", "throughput": 1319.73, "total_tokens": 6444712}
|
| 6779 |
+
{"current_steps": 33810, "total_steps": 38480, "loss": 0.1033, "lr": 2.2108882002608246e-06, "epoch": 17.572765072765073, "percentage": 87.86, "elapsed_time": "1:21:24", "remaining_time": "0:11:14", "throughput": 1319.74, "total_tokens": 6445704}
|
| 6780 |
+
{"current_steps": 33815, "total_steps": 38480, "loss": 0.1036, "lr": 2.206228352340306e-06, "epoch": 17.575363825363826, "percentage": 87.88, "elapsed_time": "1:21:24", "remaining_time": "0:11:13", "throughput": 1319.75, "total_tokens": 6446664}
|
| 6781 |
+
{"current_steps": 33820, "total_steps": 38480, "loss": 0.0883, "lr": 2.20157319364179e-06, "epoch": 17.577962577962577, "percentage": 87.89, "elapsed_time": "1:21:25", "remaining_time": "0:11:13", "throughput": 1319.75, "total_tokens": 6447592}
|
| 6782 |
+
{"current_steps": 33825, "total_steps": 38480, "loss": 0.2043, "lr": 2.1969227251229547e-06, "epoch": 17.58056133056133, "percentage": 87.9, "elapsed_time": "1:21:26", "remaining_time": "0:11:12", "throughput": 1319.75, "total_tokens": 6448520}
|
| 6783 |
+
{"current_steps": 33830, "total_steps": 38480, "loss": 0.1, "lr": 2.1922769477405097e-06, "epoch": 17.583160083160084, "percentage": 87.92, "elapsed_time": "1:21:26", "remaining_time": "0:11:11", "throughput": 1319.75, "total_tokens": 6449448}
|
| 6784 |
+
{"current_steps": 33835, "total_steps": 38480, "loss": 0.0768, "lr": 2.187635862450199e-06, "epoch": 17.585758835758835, "percentage": 87.93, "elapsed_time": "1:21:27", "remaining_time": "0:11:10", "throughput": 1319.76, "total_tokens": 6450408}
|
| 6785 |
+
{"current_steps": 33840, "total_steps": 38480, "loss": 0.1102, "lr": 2.1829994702068146e-06, "epoch": 17.58835758835759, "percentage": 87.94, "elapsed_time": "1:21:28", "remaining_time": "0:11:10", "throughput": 1319.75, "total_tokens": 6451304}
|
| 6786 |
+
{"current_steps": 33845, "total_steps": 38480, "loss": 0.1633, "lr": 2.178367771964168e-06, "epoch": 17.590956340956343, "percentage": 87.95, "elapsed_time": "1:21:28", "remaining_time": "0:11:09", "throughput": 1319.76, "total_tokens": 6452264}
|
| 6787 |
+
{"current_steps": 33850, "total_steps": 38480, "loss": 0.1786, "lr": 2.173740768675106e-06, "epoch": 17.593555093555093, "percentage": 87.97, "elapsed_time": "1:21:29", "remaining_time": "0:11:08", "throughput": 1319.78, "total_tokens": 6453256}
|
| 6788 |
+
{"current_steps": 33855, "total_steps": 38480, "loss": 0.1945, "lr": 2.1691184612915196e-06, "epoch": 17.596153846153847, "percentage": 87.98, "elapsed_time": "1:21:30", "remaining_time": "0:11:08", "throughput": 1319.77, "total_tokens": 6454152}
|
| 6789 |
+
{"current_steps": 33860, "total_steps": 38480, "loss": 0.2106, "lr": 2.1645008507643254e-06, "epoch": 17.598752598752597, "percentage": 87.99, "elapsed_time": "1:21:31", "remaining_time": "0:11:07", "throughput": 1319.77, "total_tokens": 6455048}
|
| 6790 |
+
{"current_steps": 33865, "total_steps": 38480, "loss": 0.1387, "lr": 2.1598879380434756e-06, "epoch": 17.60135135135135, "percentage": 88.01, "elapsed_time": "1:21:31", "remaining_time": "0:11:06", "throughput": 1319.77, "total_tokens": 6456008}
|
| 6791 |
+
{"current_steps": 33870, "total_steps": 38480, "loss": 0.1421, "lr": 2.155279724077952e-06, "epoch": 17.603950103950105, "percentage": 88.02, "elapsed_time": "1:21:32", "remaining_time": "0:11:05", "throughput": 1319.77, "total_tokens": 6456904}
|
| 6792 |
+
{"current_steps": 33875, "total_steps": 38480, "loss": 0.1249, "lr": 2.150676209815783e-06, "epoch": 17.606548856548855, "percentage": 88.03, "elapsed_time": "1:21:33", "remaining_time": "0:11:05", "throughput": 1319.77, "total_tokens": 6457832}
|
| 6793 |
+
{"current_steps": 33880, "total_steps": 38480, "loss": 0.0816, "lr": 2.1460773962040158e-06, "epoch": 17.60914760914761, "percentage": 88.05, "elapsed_time": "1:21:33", "remaining_time": "0:11:04", "throughput": 1319.77, "total_tokens": 6458728}
|
| 6794 |
+
{"current_steps": 33885, "total_steps": 38480, "loss": 0.0863, "lr": 2.141483284188736e-06, "epoch": 17.611746361746363, "percentage": 88.06, "elapsed_time": "1:21:34", "remaining_time": "0:11:03", "throughput": 1319.77, "total_tokens": 6459656}
|
| 6795 |
+
{"current_steps": 33890, "total_steps": 38480, "loss": 0.1894, "lr": 2.136893874715065e-06, "epoch": 17.614345114345113, "percentage": 88.07, "elapsed_time": "1:21:35", "remaining_time": "0:11:03", "throughput": 1319.78, "total_tokens": 6460648}
|
| 6796 |
+
{"current_steps": 33895, "total_steps": 38480, "loss": 0.1695, "lr": 2.132309168727148e-06, "epoch": 17.616943866943867, "percentage": 88.08, "elapsed_time": "1:21:35", "remaining_time": "0:11:02", "throughput": 1319.8, "total_tokens": 6461640}
|
| 6797 |
+
{"current_steps": 33900, "total_steps": 38480, "loss": 0.0929, "lr": 2.1277291671681716e-06, "epoch": 17.61954261954262, "percentage": 88.1, "elapsed_time": "1:21:36", "remaining_time": "0:11:01", "throughput": 1319.81, "total_tokens": 6462600}
|
| 6798 |
+
{"current_steps": 33905, "total_steps": 38480, "loss": 0.1298, "lr": 2.1231538709803487e-06, "epoch": 17.62214137214137, "percentage": 88.11, "elapsed_time": "1:21:37", "remaining_time": "0:11:00", "throughput": 1319.81, "total_tokens": 6463560}
|
| 6799 |
+
{"current_steps": 33910, "total_steps": 38480, "loss": 0.1643, "lr": 2.1185832811049327e-06, "epoch": 17.624740124740125, "percentage": 88.12, "elapsed_time": "1:21:38", "remaining_time": "0:11:00", "throughput": 1319.81, "total_tokens": 6464456}
|
| 6800 |
+
{"current_steps": 33915, "total_steps": 38480, "loss": 0.0829, "lr": 2.1140173984821987e-06, "epoch": 17.62733887733888, "percentage": 88.14, "elapsed_time": "1:21:38", "remaining_time": "0:10:59", "throughput": 1319.83, "total_tokens": 6465448}
|
| 6801 |
+
{"current_steps": 33920, "total_steps": 38480, "loss": 0.1057, "lr": 2.1094562240514563e-06, "epoch": 17.62993762993763, "percentage": 88.15, "elapsed_time": "1:21:39", "remaining_time": "0:10:58", "throughput": 1319.85, "total_tokens": 6466504}
|
| 6802 |
+
{"current_steps": 33925, "total_steps": 38480, "loss": 0.1211, "lr": 2.1048997587510473e-06, "epoch": 17.632536382536383, "percentage": 88.16, "elapsed_time": "1:21:40", "remaining_time": "0:10:57", "throughput": 1319.86, "total_tokens": 6467432}
|
| 6803 |
+
{"current_steps": 33930, "total_steps": 38480, "loss": 0.2286, "lr": 2.1003480035183513e-06, "epoch": 17.635135135135137, "percentage": 88.18, "elapsed_time": "1:21:40", "remaining_time": "0:10:57", "throughput": 1319.88, "total_tokens": 6468456}
|
| 6804 |
+
{"current_steps": 33935, "total_steps": 38480, "loss": 0.1041, "lr": 2.0958009592897705e-06, "epoch": 17.637733887733887, "percentage": 88.19, "elapsed_time": "1:21:41", "remaining_time": "0:10:56", "throughput": 1319.89, "total_tokens": 6469448}
|
| 6805 |
+
{"current_steps": 33940, "total_steps": 38480, "loss": 0.1874, "lr": 2.0912586270007388e-06, "epoch": 17.64033264033264, "percentage": 88.2, "elapsed_time": "1:21:42", "remaining_time": "0:10:55", "throughput": 1319.89, "total_tokens": 6470376}
|
| 6806 |
+
{"current_steps": 33945, "total_steps": 38480, "loss": 0.1768, "lr": 2.086721007585715e-06, "epoch": 17.64293139293139, "percentage": 88.21, "elapsed_time": "1:21:42", "remaining_time": "0:10:55", "throughput": 1319.91, "total_tokens": 6471368}
|
| 6807 |
+
{"current_steps": 33950, "total_steps": 38480, "loss": 0.1092, "lr": 2.0821881019782134e-06, "epoch": 17.645530145530145, "percentage": 88.23, "elapsed_time": "1:21:43", "remaining_time": "0:10:54", "throughput": 1319.91, "total_tokens": 6472296}
|
| 6808 |
+
{"current_steps": 33955, "total_steps": 38480, "loss": 0.1234, "lr": 2.0776599111107466e-06, "epoch": 17.6481288981289, "percentage": 88.24, "elapsed_time": "1:21:44", "remaining_time": "0:10:53", "throughput": 1319.93, "total_tokens": 6473320}
|
| 6809 |
+
{"current_steps": 33960, "total_steps": 38480, "loss": 0.068, "lr": 2.0731364359148687e-06, "epoch": 17.65072765072765, "percentage": 88.25, "elapsed_time": "1:21:45", "remaining_time": "0:10:52", "throughput": 1319.94, "total_tokens": 6474312}
|
| 6810 |
+
{"current_steps": 33965, "total_steps": 38480, "loss": 0.0687, "lr": 2.0686176773211776e-06, "epoch": 17.653326403326403, "percentage": 88.27, "elapsed_time": "1:21:45", "remaining_time": "0:10:52", "throughput": 1319.94, "total_tokens": 6475240}
|
| 6811 |
+
{"current_steps": 33970, "total_steps": 38480, "loss": 0.1341, "lr": 2.0641036362592818e-06, "epoch": 17.655925155925157, "percentage": 88.28, "elapsed_time": "1:21:46", "remaining_time": "0:10:51", "throughput": 1319.96, "total_tokens": 6476232}
|
| 6812 |
+
{"current_steps": 33975, "total_steps": 38480, "loss": 0.046, "lr": 2.0595943136578284e-06, "epoch": 17.658523908523907, "percentage": 88.29, "elapsed_time": "1:21:47", "remaining_time": "0:10:50", "throughput": 1319.95, "total_tokens": 6477128}
|
| 6813 |
+
{"current_steps": 33980, "total_steps": 38480, "loss": 0.0937, "lr": 2.055089710444497e-06, "epoch": 17.66112266112266, "percentage": 88.31, "elapsed_time": "1:21:47", "remaining_time": "0:10:49", "throughput": 1319.96, "total_tokens": 6478120}
|
| 6814 |
+
{"current_steps": 33985, "total_steps": 38480, "loss": 0.068, "lr": 2.05058982754599e-06, "epoch": 17.663721413721415, "percentage": 88.32, "elapsed_time": "1:21:48", "remaining_time": "0:10:49", "throughput": 1319.96, "total_tokens": 6479016}
|
| 6815 |
+
{"current_steps": 33990, "total_steps": 38480, "loss": 0.156, "lr": 2.0460946658880376e-06, "epoch": 17.666320166320165, "percentage": 88.33, "elapsed_time": "1:21:49", "remaining_time": "0:10:48", "throughput": 1319.98, "total_tokens": 6480040}
|
| 6816 |
+
{"current_steps": 33995, "total_steps": 38480, "loss": 0.0885, "lr": 2.0416042263953996e-06, "epoch": 17.66891891891892, "percentage": 88.34, "elapsed_time": "1:21:49", "remaining_time": "0:10:47", "throughput": 1319.99, "total_tokens": 6481000}
|
| 6817 |
+
{"current_steps": 34000, "total_steps": 38480, "loss": 0.3068, "lr": 2.0371185099918756e-06, "epoch": 17.671517671517673, "percentage": 88.36, "elapsed_time": "1:21:50", "remaining_time": "0:10:47", "throughput": 1320.0, "total_tokens": 6481992}
|
| 6818 |
+
{"current_steps": 34005, "total_steps": 38480, "loss": 0.0763, "lr": 2.0326375176002777e-06, "epoch": 17.674116424116423, "percentage": 88.37, "elapsed_time": "1:21:51", "remaining_time": "0:10:46", "throughput": 1320.02, "total_tokens": 6482984}
|
| 6819 |
+
{"current_steps": 34010, "total_steps": 38480, "loss": 0.2626, "lr": 2.0281612501424546e-06, "epoch": 17.676715176715177, "percentage": 88.38, "elapsed_time": "1:21:51", "remaining_time": "0:10:45", "throughput": 1320.02, "total_tokens": 6483912}
|
| 6820 |
+
{"current_steps": 34015, "total_steps": 38480, "loss": 0.0888, "lr": 2.023689708539278e-06, "epoch": 17.679313929313928, "percentage": 88.4, "elapsed_time": "1:21:52", "remaining_time": "0:10:44", "throughput": 1320.01, "total_tokens": 6484776}
|
| 6821 |
+
{"current_steps": 34020, "total_steps": 38480, "loss": 0.1527, "lr": 2.019222893710657e-06, "epoch": 17.68191268191268, "percentage": 88.41, "elapsed_time": "1:21:53", "remaining_time": "0:10:44", "throughput": 1320.01, "total_tokens": 6485704}
|
| 6822 |
+
{"current_steps": 34025, "total_steps": 38480, "loss": 0.2162, "lr": 2.014760806575516e-06, "epoch": 17.684511434511435, "percentage": 88.42, "elapsed_time": "1:21:54", "remaining_time": "0:10:43", "throughput": 1320.02, "total_tokens": 6486664}
|
| 6823 |
+
{"current_steps": 34030, "total_steps": 38480, "loss": 0.1425, "lr": 2.010303448051809e-06, "epoch": 17.687110187110186, "percentage": 88.44, "elapsed_time": "1:21:54", "remaining_time": "0:10:42", "throughput": 1320.03, "total_tokens": 6487624}
|
| 6824 |
+
{"current_steps": 34035, "total_steps": 38480, "loss": 0.2093, "lr": 2.0058508190565315e-06, "epoch": 17.68970893970894, "percentage": 88.45, "elapsed_time": "1:21:55", "remaining_time": "0:10:41", "throughput": 1320.03, "total_tokens": 6488584}
|
| 6825 |
+
{"current_steps": 34040, "total_steps": 38480, "loss": 0.0616, "lr": 2.0014029205056935e-06, "epoch": 17.692307692307693, "percentage": 88.46, "elapsed_time": "1:21:56", "remaining_time": "0:10:41", "throughput": 1320.04, "total_tokens": 6489544}
|
| 6826 |
+
{"current_steps": 34045, "total_steps": 38480, "loss": 0.0407, "lr": 1.9969597533143285e-06, "epoch": 17.694906444906444, "percentage": 88.47, "elapsed_time": "1:21:56", "remaining_time": "0:10:40", "throughput": 1320.05, "total_tokens": 6490504}
|
| 6827 |
+
{"current_steps": 34050, "total_steps": 38480, "loss": 0.1402, "lr": 1.992521318396498e-06, "epoch": 17.697505197505198, "percentage": 88.49, "elapsed_time": "1:21:57", "remaining_time": "0:10:39", "throughput": 1320.06, "total_tokens": 6491464}
|
| 6828 |
+
{"current_steps": 34055, "total_steps": 38480, "loss": 0.0331, "lr": 1.988087616665307e-06, "epoch": 17.70010395010395, "percentage": 88.5, "elapsed_time": "1:21:58", "remaining_time": "0:10:39", "throughput": 1320.06, "total_tokens": 6492392}
|
| 6829 |
+
{"current_steps": 34060, "total_steps": 38480, "loss": 0.0281, "lr": 1.9836586490328627e-06, "epoch": 17.7027027027027, "percentage": 88.51, "elapsed_time": "1:21:58", "remaining_time": "0:10:38", "throughput": 1320.05, "total_tokens": 6493256}
|
| 6830 |
+
{"current_steps": 34065, "total_steps": 38480, "loss": 0.2953, "lr": 1.9792344164103167e-06, "epoch": 17.705301455301456, "percentage": 88.53, "elapsed_time": "1:21:59", "remaining_time": "0:10:37", "throughput": 1320.06, "total_tokens": 6494248}
|
| 6831 |
+
{"current_steps": 34070, "total_steps": 38480, "loss": 0.1263, "lr": 1.974814919707829e-06, "epoch": 17.70790020790021, "percentage": 88.54, "elapsed_time": "1:22:00", "remaining_time": "0:10:36", "throughput": 1320.05, "total_tokens": 6495112}
|
| 6832 |
+
{"current_steps": 34075, "total_steps": 38480, "loss": 0.2709, "lr": 1.9704001598346066e-06, "epoch": 17.71049896049896, "percentage": 88.55, "elapsed_time": "1:22:01", "remaining_time": "0:10:36", "throughput": 1320.05, "total_tokens": 6496040}
|
| 6833 |
+
{"current_steps": 34080, "total_steps": 38480, "loss": 0.1396, "lr": 1.965990137698867e-06, "epoch": 17.713097713097714, "percentage": 88.57, "elapsed_time": "1:22:01", "remaining_time": "0:10:35", "throughput": 1320.05, "total_tokens": 6496936}
|
| 6834 |
+
{"current_steps": 34085, "total_steps": 38480, "loss": 0.1008, "lr": 1.9615848542078567e-06, "epoch": 17.715696465696467, "percentage": 88.58, "elapsed_time": "1:22:02", "remaining_time": "0:10:34", "throughput": 1320.04, "total_tokens": 6497832}
|
| 6835 |
+
{"current_steps": 34090, "total_steps": 38480, "loss": 0.2372, "lr": 1.9571843102678455e-06, "epoch": 17.718295218295218, "percentage": 88.59, "elapsed_time": "1:22:03", "remaining_time": "0:10:33", "throughput": 1320.08, "total_tokens": 6498920}
|
| 6836 |
+
{"current_steps": 34095, "total_steps": 38480, "loss": 0.0805, "lr": 1.9527885067841282e-06, "epoch": 17.72089397089397, "percentage": 88.6, "elapsed_time": "1:22:03", "remaining_time": "0:10:33", "throughput": 1320.06, "total_tokens": 6499752}
|
| 6837 |
+
{"current_steps": 34100, "total_steps": 38480, "loss": 0.0893, "lr": 1.948397444661029e-06, "epoch": 17.723492723492722, "percentage": 88.62, "elapsed_time": "1:22:04", "remaining_time": "0:10:32", "throughput": 1320.07, "total_tokens": 6500712}
|
| 6838 |
+
{"current_steps": 34105, "total_steps": 38480, "loss": 0.1765, "lr": 1.9440111248018976e-06, "epoch": 17.726091476091476, "percentage": 88.63, "elapsed_time": "1:22:05", "remaining_time": "0:10:31", "throughput": 1320.07, "total_tokens": 6501672}
|
| 6839 |
+
{"current_steps": 34110, "total_steps": 38480, "loss": 0.2215, "lr": 1.939629548109101e-06, "epoch": 17.72869022869023, "percentage": 88.64, "elapsed_time": "1:22:05", "remaining_time": "0:10:31", "throughput": 1320.09, "total_tokens": 6502664}
|
| 6840 |
+
{"current_steps": 34115, "total_steps": 38480, "loss": 0.1151, "lr": 1.9352527154840345e-06, "epoch": 17.73128898128898, "percentage": 88.66, "elapsed_time": "1:22:06", "remaining_time": "0:10:30", "throughput": 1320.1, "total_tokens": 6503624}
|
| 6841 |
+
{"current_steps": 34120, "total_steps": 38480, "loss": 0.0929, "lr": 1.9308806278271143e-06, "epoch": 17.733887733887734, "percentage": 88.67, "elapsed_time": "1:22:07", "remaining_time": "0:10:29", "throughput": 1320.09, "total_tokens": 6504520}
|
| 6842 |
+
{"current_steps": 34125, "total_steps": 38480, "loss": 0.0731, "lr": 1.9265132860377872e-06, "epoch": 17.736486486486488, "percentage": 88.68, "elapsed_time": "1:22:08", "remaining_time": "0:10:28", "throughput": 1320.1, "total_tokens": 6505512}
|
| 6843 |
+
{"current_steps": 34130, "total_steps": 38480, "loss": 0.1284, "lr": 1.922150691014518e-06, "epoch": 17.739085239085238, "percentage": 88.7, "elapsed_time": "1:22:08", "remaining_time": "0:10:28", "throughput": 1320.11, "total_tokens": 6506472}
|
| 6844 |
+
{"current_steps": 34135, "total_steps": 38480, "loss": 0.1393, "lr": 1.9177928436547946e-06, "epoch": 17.741683991683992, "percentage": 88.71, "elapsed_time": "1:22:09", "remaining_time": "0:10:27", "throughput": 1320.11, "total_tokens": 6507400}
|
| 6845 |
+
{"current_steps": 34140, "total_steps": 38480, "loss": 0.1167, "lr": 1.9134397448551305e-06, "epoch": 17.744282744282746, "percentage": 88.72, "elapsed_time": "1:22:10", "remaining_time": "0:10:26", "throughput": 1320.09, "total_tokens": 6508232}
|
| 6846 |
+
{"current_steps": 34145, "total_steps": 38480, "loss": 0.0848, "lr": 1.9090913955110644e-06, "epoch": 17.746881496881496, "percentage": 88.73, "elapsed_time": "1:22:10", "remaining_time": "0:10:26", "throughput": 1320.1, "total_tokens": 6509192}
|
| 6847 |
+
{"current_steps": 34150, "total_steps": 38480, "loss": 0.1169, "lr": 1.9047477965171618e-06, "epoch": 17.74948024948025, "percentage": 88.75, "elapsed_time": "1:22:11", "remaining_time": "0:10:25", "throughput": 1320.1, "total_tokens": 6510120}
|
| 6848 |
+
{"current_steps": 34155, "total_steps": 38480, "loss": 0.1444, "lr": 1.9004089487669853e-06, "epoch": 17.752079002079, "percentage": 88.76, "elapsed_time": "1:22:12", "remaining_time": "0:10:24", "throughput": 1320.1, "total_tokens": 6511048}
|
| 6849 |
+
{"current_steps": 34160, "total_steps": 38480, "loss": 0.0922, "lr": 1.8960748531531542e-06, "epoch": 17.754677754677754, "percentage": 88.77, "elapsed_time": "1:22:12", "remaining_time": "0:10:23", "throughput": 1320.12, "total_tokens": 6512040}
|
| 6850 |
+
{"current_steps": 34165, "total_steps": 38480, "loss": 0.2737, "lr": 1.8917455105672943e-06, "epoch": 17.757276507276508, "percentage": 88.79, "elapsed_time": "1:22:13", "remaining_time": "0:10:23", "throughput": 1320.13, "total_tokens": 6513032}
|
| 6851 |
+
{"current_steps": 34170, "total_steps": 38480, "loss": 0.1289, "lr": 1.8874209219000488e-06, "epoch": 17.75987525987526, "percentage": 88.8, "elapsed_time": "1:22:14", "remaining_time": "0:10:22", "throughput": 1320.13, "total_tokens": 6513928}
|
| 6852 |
+
{"current_steps": 34175, "total_steps": 38480, "loss": 0.1116, "lr": 1.8831010880410893e-06, "epoch": 17.762474012474012, "percentage": 88.81, "elapsed_time": "1:22:15", "remaining_time": "0:10:21", "throughput": 1320.14, "total_tokens": 6514920}
|
| 6853 |
+
{"current_steps": 34180, "total_steps": 38480, "loss": 0.0794, "lr": 1.878786009879116e-06, "epoch": 17.765072765072766, "percentage": 88.83, "elapsed_time": "1:22:15", "remaining_time": "0:10:20", "throughput": 1320.14, "total_tokens": 6515816}
|
| 6854 |
+
{"current_steps": 34185, "total_steps": 38480, "loss": 0.1542, "lr": 1.8744756883018354e-06, "epoch": 17.767671517671516, "percentage": 88.84, "elapsed_time": "1:22:16", "remaining_time": "0:10:20", "throughput": 1320.16, "total_tokens": 6516840}
|
| 6855 |
+
{"current_steps": 34190, "total_steps": 38480, "loss": 0.1549, "lr": 1.8701701241959885e-06, "epoch": 17.77027027027027, "percentage": 88.85, "elapsed_time": "1:22:17", "remaining_time": "0:10:19", "throughput": 1320.16, "total_tokens": 6517800}
|
| 6856 |
+
{"current_steps": 34195, "total_steps": 38480, "loss": 0.1156, "lr": 1.8658693184473275e-06, "epoch": 17.772869022869024, "percentage": 88.86, "elapsed_time": "1:22:17", "remaining_time": "0:10:18", "throughput": 1320.17, "total_tokens": 6518728}
|
| 6857 |
+
{"current_steps": 34200, "total_steps": 38480, "loss": 0.1117, "lr": 1.8615732719406337e-06, "epoch": 17.775467775467774, "percentage": 88.88, "elapsed_time": "1:22:18", "remaining_time": "0:10:18", "throughput": 1320.17, "total_tokens": 6519688}
|
| 6858 |
+
{"current_steps": 34205, "total_steps": 38480, "loss": 0.1157, "lr": 1.8572819855597085e-06, "epoch": 17.778066528066528, "percentage": 88.89, "elapsed_time": "1:22:19", "remaining_time": "0:10:17", "throughput": 1320.17, "total_tokens": 6520616}
|
| 6859 |
+
{"current_steps": 34210, "total_steps": 38480, "loss": 0.1451, "lr": 1.852995460187365e-06, "epoch": 17.780665280665282, "percentage": 88.9, "elapsed_time": "1:22:19", "remaining_time": "0:10:16", "throughput": 1320.19, "total_tokens": 6521640}
|
| 6860 |
+
{"current_steps": 34215, "total_steps": 38480, "loss": 0.1224, "lr": 1.8487136967054564e-06, "epoch": 17.783264033264032, "percentage": 88.92, "elapsed_time": "1:22:20", "remaining_time": "0:10:15", "throughput": 1320.19, "total_tokens": 6522536}
|
| 6861 |
+
{"current_steps": 34220, "total_steps": 38480, "loss": 0.0898, "lr": 1.8444366959948278e-06, "epoch": 17.785862785862786, "percentage": 88.93, "elapsed_time": "1:22:21", "remaining_time": "0:10:15", "throughput": 1320.19, "total_tokens": 6523464}
|
| 6862 |
+
{"current_steps": 34225, "total_steps": 38480, "loss": 0.109, "lr": 1.8401644589353673e-06, "epoch": 17.78846153846154, "percentage": 88.94, "elapsed_time": "1:22:22", "remaining_time": "0:10:14", "throughput": 1320.19, "total_tokens": 6524392}
|
| 6863 |
+
{"current_steps": 34230, "total_steps": 38480, "loss": 0.1149, "lr": 1.8358969864059771e-06, "epoch": 17.79106029106029, "percentage": 88.96, "elapsed_time": "1:22:22", "remaining_time": "0:10:13", "throughput": 1320.2, "total_tokens": 6525352}
|
| 6864 |
+
{"current_steps": 34235, "total_steps": 38480, "loss": 0.1256, "lr": 1.8316342792845776e-06, "epoch": 17.793659043659044, "percentage": 88.97, "elapsed_time": "1:22:23", "remaining_time": "0:10:12", "throughput": 1320.18, "total_tokens": 6526216}
|
| 6865 |
+
{"current_steps": 34240, "total_steps": 38480, "loss": 0.1345, "lr": 1.8273763384481086e-06, "epoch": 17.796257796257795, "percentage": 88.98, "elapsed_time": "1:22:24", "remaining_time": "0:10:12", "throughput": 1320.19, "total_tokens": 6527176}
|
| 6866 |
+
{"current_steps": 34245, "total_steps": 38480, "loss": 0.1008, "lr": 1.8231231647725221e-06, "epoch": 17.79885654885655, "percentage": 88.99, "elapsed_time": "1:22:24", "remaining_time": "0:10:11", "throughput": 1320.2, "total_tokens": 6528136}
|
| 6867 |
+
{"current_steps": 34250, "total_steps": 38480, "loss": 0.1185, "lr": 1.8188747591328132e-06, "epoch": 17.801455301455302, "percentage": 89.01, "elapsed_time": "1:22:25", "remaining_time": "0:10:10", "throughput": 1320.21, "total_tokens": 6529128}
|
| 6868 |
+
{"current_steps": 34255, "total_steps": 38480, "loss": 0.0573, "lr": 1.814631122402971e-06, "epoch": 17.804054054054053, "percentage": 89.02, "elapsed_time": "1:22:26", "remaining_time": "0:10:10", "throughput": 1320.21, "total_tokens": 6530056}
|
| 6869 |
+
{"current_steps": 34260, "total_steps": 38480, "loss": 0.0951, "lr": 1.8103922554560114e-06, "epoch": 17.806652806652806, "percentage": 89.03, "elapsed_time": "1:22:26", "remaining_time": "0:10:09", "throughput": 1320.21, "total_tokens": 6530984}
|
| 6870 |
+
{"current_steps": 34265, "total_steps": 38480, "loss": 0.0446, "lr": 1.8061581591639705e-06, "epoch": 17.80925155925156, "percentage": 89.05, "elapsed_time": "1:22:27", "remaining_time": "0:10:08", "throughput": 1320.23, "total_tokens": 6531976}
|
| 6871 |
+
{"current_steps": 34270, "total_steps": 38480, "loss": 0.0821, "lr": 1.801928834397909e-06, "epoch": 17.81185031185031, "percentage": 89.06, "elapsed_time": "1:22:28", "remaining_time": "0:10:07", "throughput": 1320.24, "total_tokens": 6532968}
|
| 6872 |
+
{"current_steps": 34275, "total_steps": 38480, "loss": 0.1564, "lr": 1.7977042820278956e-06, "epoch": 17.814449064449065, "percentage": 89.07, "elapsed_time": "1:22:29", "remaining_time": "0:10:07", "throughput": 1320.25, "total_tokens": 6533928}
|
| 6873 |
+
{"current_steps": 34280, "total_steps": 38480, "loss": 0.1296, "lr": 1.7934845029230207e-06, "epoch": 17.81704781704782, "percentage": 89.09, "elapsed_time": "1:22:29", "remaining_time": "0:10:06", "throughput": 1320.26, "total_tokens": 6534920}
|
| 6874 |
+
{"current_steps": 34285, "total_steps": 38480, "loss": 0.2616, "lr": 1.789269497951393e-06, "epoch": 17.81964656964657, "percentage": 89.1, "elapsed_time": "1:22:30", "remaining_time": "0:10:05", "throughput": 1320.27, "total_tokens": 6535912}
|
| 6875 |
+
{"current_steps": 34290, "total_steps": 38480, "loss": 0.2627, "lr": 1.7850592679801438e-06, "epoch": 17.822245322245323, "percentage": 89.11, "elapsed_time": "1:22:31", "remaining_time": "0:10:04", "throughput": 1320.29, "total_tokens": 6536904}
|
| 6876 |
+
{"current_steps": 34295, "total_steps": 38480, "loss": 0.0938, "lr": 1.7808538138754132e-06, "epoch": 17.824844074844076, "percentage": 89.12, "elapsed_time": "1:22:31", "remaining_time": "0:10:04", "throughput": 1320.29, "total_tokens": 6537864}
|
| 6877 |
+
{"current_steps": 34300, "total_steps": 38480, "loss": 0.0564, "lr": 1.7766531365023597e-06, "epoch": 17.827442827442827, "percentage": 89.14, "elapsed_time": "1:22:32", "remaining_time": "0:10:03", "throughput": 1320.31, "total_tokens": 6538888}
|
| 6878 |
+
{"current_steps": 34305, "total_steps": 38480, "loss": 0.0919, "lr": 1.7724572367251723e-06, "epoch": 17.83004158004158, "percentage": 89.15, "elapsed_time": "1:22:33", "remaining_time": "0:10:02", "throughput": 1320.3, "total_tokens": 6539752}
|
| 6879 |
+
{"current_steps": 34310, "total_steps": 38480, "loss": 0.0942, "lr": 1.7682661154070411e-06, "epoch": 17.83264033264033, "percentage": 89.16, "elapsed_time": "1:22:33", "remaining_time": "0:10:02", "throughput": 1320.32, "total_tokens": 6540744}
|
| 6880 |
+
{"current_steps": 34315, "total_steps": 38480, "loss": 0.1835, "lr": 1.764079773410185e-06, "epoch": 17.835239085239085, "percentage": 89.18, "elapsed_time": "1:22:34", "remaining_time": "0:10:01", "throughput": 1320.31, "total_tokens": 6541640}
|
| 6881 |
+
{"current_steps": 34320, "total_steps": 38480, "loss": 0.19, "lr": 1.759898211595823e-06, "epoch": 17.83783783783784, "percentage": 89.19, "elapsed_time": "1:22:35", "remaining_time": "0:10:00", "throughput": 1320.3, "total_tokens": 6542536}
|
| 6882 |
+
{"current_steps": 34325, "total_steps": 38480, "loss": 0.2351, "lr": 1.755721430824217e-06, "epoch": 17.84043659043659, "percentage": 89.2, "elapsed_time": "1:22:36", "remaining_time": "0:09:59", "throughput": 1320.29, "total_tokens": 6543400}
|
| 6883 |
+
{"current_steps": 34330, "total_steps": 38480, "loss": 0.2396, "lr": 1.7515494319546215e-06, "epoch": 17.843035343035343, "percentage": 89.22, "elapsed_time": "1:22:36", "remaining_time": "0:09:59", "throughput": 1320.3, "total_tokens": 6544360}
|
| 6884 |
+
{"current_steps": 34335, "total_steps": 38480, "loss": 0.0691, "lr": 1.747382215845314e-06, "epoch": 17.845634095634097, "percentage": 89.23, "elapsed_time": "1:22:37", "remaining_time": "0:09:58", "throughput": 1320.31, "total_tokens": 6545352}
|
| 6885 |
+
{"current_steps": 34340, "total_steps": 38480, "loss": 0.2042, "lr": 1.7432197833535975e-06, "epoch": 17.848232848232847, "percentage": 89.24, "elapsed_time": "1:22:38", "remaining_time": "0:09:57", "throughput": 1320.31, "total_tokens": 6546280}
|
| 6886 |
+
{"current_steps": 34345, "total_steps": 38480, "loss": 0.1465, "lr": 1.739062135335784e-06, "epoch": 17.8508316008316, "percentage": 89.25, "elapsed_time": "1:22:38", "remaining_time": "0:09:57", "throughput": 1320.32, "total_tokens": 6547208}
|
| 6887 |
+
{"current_steps": 34350, "total_steps": 38480, "loss": 0.1039, "lr": 1.7349092726471894e-06, "epoch": 17.853430353430355, "percentage": 89.27, "elapsed_time": "1:22:39", "remaining_time": "0:09:56", "throughput": 1320.31, "total_tokens": 6548104}
|
| 6888 |
+
{"current_steps": 34355, "total_steps": 38480, "loss": 0.1177, "lr": 1.7307611961421665e-06, "epoch": 17.856029106029105, "percentage": 89.28, "elapsed_time": "1:22:40", "remaining_time": "0:09:55", "throughput": 1320.32, "total_tokens": 6549064}
|
| 6889 |
+
{"current_steps": 34360, "total_steps": 38480, "loss": 0.0634, "lr": 1.7266179066740718e-06, "epoch": 17.85862785862786, "percentage": 89.29, "elapsed_time": "1:22:40", "remaining_time": "0:09:54", "throughput": 1320.32, "total_tokens": 6549992}
|
| 6890 |
+
{"current_steps": 34365, "total_steps": 38480, "loss": 0.1812, "lr": 1.7224794050952787e-06, "epoch": 17.861226611226613, "percentage": 89.31, "elapsed_time": "1:22:41", "remaining_time": "0:09:54", "throughput": 1320.32, "total_tokens": 6550920}
|
| 6891 |
+
{"current_steps": 34370, "total_steps": 38480, "loss": 0.0905, "lr": 1.71834569225717e-06, "epoch": 17.863825363825363, "percentage": 89.32, "elapsed_time": "1:22:42", "remaining_time": "0:09:53", "throughput": 1320.32, "total_tokens": 6551848}
|
| 6892 |
+
{"current_steps": 34375, "total_steps": 38480, "loss": 0.1308, "lr": 1.7142167690101574e-06, "epoch": 17.866424116424117, "percentage": 89.33, "elapsed_time": "1:22:43", "remaining_time": "0:09:52", "throughput": 1320.34, "total_tokens": 6552840}
|
| 6893 |
+
{"current_steps": 34380, "total_steps": 38480, "loss": 0.1477, "lr": 1.7100926362036529e-06, "epoch": 17.86902286902287, "percentage": 89.35, "elapsed_time": "1:22:43", "remaining_time": "0:09:51", "throughput": 1320.35, "total_tokens": 6553800}
|
| 6894 |
+
{"current_steps": 34385, "total_steps": 38480, "loss": 0.1926, "lr": 1.7059732946860945e-06, "epoch": 17.87162162162162, "percentage": 89.36, "elapsed_time": "1:22:44", "remaining_time": "0:09:51", "throughput": 1320.37, "total_tokens": 6554824}
|
| 6895 |
+
{"current_steps": 34390, "total_steps": 38480, "loss": 0.1881, "lr": 1.7018587453049184e-06, "epoch": 17.874220374220375, "percentage": 89.37, "elapsed_time": "1:22:45", "remaining_time": "0:09:50", "throughput": 1320.37, "total_tokens": 6555752}
|
| 6896 |
+
{"current_steps": 34395, "total_steps": 38480, "loss": 0.1135, "lr": 1.6977489889065973e-06, "epoch": 17.876819126819125, "percentage": 89.38, "elapsed_time": "1:22:45", "remaining_time": "0:09:49", "throughput": 1320.37, "total_tokens": 6556712}
|
| 6897 |
+
{"current_steps": 34400, "total_steps": 38480, "loss": 0.1031, "lr": 1.6936440263366022e-06, "epoch": 17.87941787941788, "percentage": 89.4, "elapsed_time": "1:22:46", "remaining_time": "0:09:49", "throughput": 1320.37, "total_tokens": 6557640}
|
| 6898 |
+
{"current_steps": 34405, "total_steps": 38480, "loss": 0.1593, "lr": 1.6895438584394186e-06, "epoch": 17.882016632016633, "percentage": 89.41, "elapsed_time": "1:22:47", "remaining_time": "0:09:48", "throughput": 1320.39, "total_tokens": 6558632}
|
| 6899 |
+
{"current_steps": 34410, "total_steps": 38480, "loss": 0.1056, "lr": 1.6854484860585523e-06, "epoch": 17.884615384615383, "percentage": 89.42, "elapsed_time": "1:22:47", "remaining_time": "0:09:47", "throughput": 1320.42, "total_tokens": 6559720}
|
| 6900 |
+
{"current_steps": 34415, "total_steps": 38480, "loss": 0.2466, "lr": 1.6813579100365185e-06, "epoch": 17.887214137214137, "percentage": 89.44, "elapsed_time": "1:22:48", "remaining_time": "0:09:46", "throughput": 1320.44, "total_tokens": 6560712}
|
| 6901 |
+
{"current_steps": 34420, "total_steps": 38480, "loss": 0.146, "lr": 1.6772721312148438e-06, "epoch": 17.88981288981289, "percentage": 89.45, "elapsed_time": "1:22:49", "remaining_time": "0:09:46", "throughput": 1320.45, "total_tokens": 6561672}
|
| 6902 |
+
{"current_steps": 34425, "total_steps": 38480, "loss": 0.1839, "lr": 1.673191150434067e-06, "epoch": 17.89241164241164, "percentage": 89.46, "elapsed_time": "1:22:49", "remaining_time": "0:09:45", "throughput": 1320.45, "total_tokens": 6562600}
|
| 6903 |
+
{"current_steps": 34430, "total_steps": 38480, "loss": 0.1684, "lr": 1.6691149685337526e-06, "epoch": 17.895010395010395, "percentage": 89.48, "elapsed_time": "1:22:50", "remaining_time": "0:09:44", "throughput": 1320.44, "total_tokens": 6563464}
|
| 6904 |
+
{"current_steps": 34435, "total_steps": 38480, "loss": 0.1044, "lr": 1.6650435863524632e-06, "epoch": 17.89760914760915, "percentage": 89.49, "elapsed_time": "1:22:51", "remaining_time": "0:09:43", "throughput": 1320.44, "total_tokens": 6564392}
|
| 6905 |
+
{"current_steps": 34440, "total_steps": 38480, "loss": 0.0766, "lr": 1.660977004727779e-06, "epoch": 17.9002079002079, "percentage": 89.5, "elapsed_time": "1:22:52", "remaining_time": "0:09:43", "throughput": 1320.44, "total_tokens": 6565320}
|
| 6906 |
+
{"current_steps": 34445, "total_steps": 38480, "loss": 0.1422, "lr": 1.6569152244962888e-06, "epoch": 17.902806652806653, "percentage": 89.51, "elapsed_time": "1:22:52", "remaining_time": "0:09:42", "throughput": 1320.45, "total_tokens": 6566312}
|
| 6907 |
+
{"current_steps": 34450, "total_steps": 38480, "loss": 0.0345, "lr": 1.6528582464936076e-06, "epoch": 17.905405405405407, "percentage": 89.53, "elapsed_time": "1:22:53", "remaining_time": "0:09:41", "throughput": 1320.45, "total_tokens": 6567208}
|
| 6908 |
+
{"current_steps": 34455, "total_steps": 38480, "loss": 0.1358, "lr": 1.6488060715543457e-06, "epoch": 17.908004158004157, "percentage": 89.54, "elapsed_time": "1:22:54", "remaining_time": "0:09:41", "throughput": 1320.46, "total_tokens": 6568168}
|
| 6909 |
+
{"current_steps": 34460, "total_steps": 38480, "loss": 0.0801, "lr": 1.6447587005121307e-06, "epoch": 17.91060291060291, "percentage": 89.55, "elapsed_time": "1:22:54", "remaining_time": "0:09:40", "throughput": 1320.45, "total_tokens": 6569064}
|
| 6910 |
+
{"current_steps": 34465, "total_steps": 38480, "loss": 0.0701, "lr": 1.6407161341996074e-06, "epoch": 17.91320166320166, "percentage": 89.57, "elapsed_time": "1:22:55", "remaining_time": "0:09:39", "throughput": 1320.47, "total_tokens": 6570088}
|
| 6911 |
+
{"current_steps": 34470, "total_steps": 38480, "loss": 0.0948, "lr": 1.6366783734484276e-06, "epoch": 17.915800415800415, "percentage": 89.58, "elapsed_time": "1:22:56", "remaining_time": "0:09:38", "throughput": 1320.46, "total_tokens": 6570952}
|
| 6912 |
+
{"current_steps": 34475, "total_steps": 38480, "loss": 0.0987, "lr": 1.6326454190892516e-06, "epoch": 17.91839916839917, "percentage": 89.59, "elapsed_time": "1:22:56", "remaining_time": "0:09:38", "throughput": 1320.46, "total_tokens": 6571848}
|
| 6913 |
+
{"current_steps": 34480, "total_steps": 38480, "loss": 0.2215, "lr": 1.6286172719517601e-06, "epoch": 17.92099792099792, "percentage": 89.6, "elapsed_time": "1:22:57", "remaining_time": "0:09:37", "throughput": 1320.47, "total_tokens": 6572808}
|
| 6914 |
+
{"current_steps": 34485, "total_steps": 38480, "loss": 0.0714, "lr": 1.624593932864632e-06, "epoch": 17.923596673596673, "percentage": 89.62, "elapsed_time": "1:22:58", "remaining_time": "0:09:36", "throughput": 1320.47, "total_tokens": 6573736}
|
| 6915 |
+
{"current_steps": 34490, "total_steps": 38480, "loss": 0.155, "lr": 1.6205754026555664e-06, "epoch": 17.926195426195427, "percentage": 89.63, "elapsed_time": "1:22:59", "remaining_time": "0:09:36", "throughput": 1320.46, "total_tokens": 6574600}
|
| 6916 |
+
{"current_steps": 34495, "total_steps": 38480, "loss": 0.0505, "lr": 1.6165616821512714e-06, "epoch": 17.928794178794178, "percentage": 89.64, "elapsed_time": "1:22:59", "remaining_time": "0:09:35", "throughput": 1320.47, "total_tokens": 6575560}
|
| 6917 |
+
{"current_steps": 34500, "total_steps": 38480, "loss": 0.128, "lr": 1.612552772177467e-06, "epoch": 17.93139293139293, "percentage": 89.66, "elapsed_time": "1:23:00", "remaining_time": "0:09:34", "throughput": 1320.48, "total_tokens": 6576552}
|
| 6918 |
+
{"current_steps": 34505, "total_steps": 38480, "loss": 0.0544, "lr": 1.6085486735588768e-06, "epoch": 17.933991683991685, "percentage": 89.67, "elapsed_time": "1:23:01", "remaining_time": "0:09:33", "throughput": 1320.49, "total_tokens": 6577512}
|
| 6919 |
+
{"current_steps": 34510, "total_steps": 38480, "loss": 0.1165, "lr": 1.6045493871192446e-06, "epoch": 17.936590436590436, "percentage": 89.68, "elapsed_time": "1:23:01", "remaining_time": "0:09:33", "throughput": 1320.5, "total_tokens": 6578504}
|
| 6920 |
+
{"current_steps": 34515, "total_steps": 38480, "loss": 0.1762, "lr": 1.6005549136813124e-06, "epoch": 17.93918918918919, "percentage": 89.7, "elapsed_time": "1:23:02", "remaining_time": "0:09:32", "throughput": 1320.51, "total_tokens": 6579464}
|
| 6921 |
+
{"current_steps": 34520, "total_steps": 38480, "loss": 0.0637, "lr": 1.5965652540668479e-06, "epoch": 17.941787941787943, "percentage": 89.71, "elapsed_time": "1:23:03", "remaining_time": "0:09:31", "throughput": 1320.51, "total_tokens": 6580424}
|
| 6922 |
+
{"current_steps": 34525, "total_steps": 38480, "loss": 0.2097, "lr": 1.5925804090966113e-06, "epoch": 17.944386694386694, "percentage": 89.72, "elapsed_time": "1:23:03", "remaining_time": "0:09:30", "throughput": 1320.53, "total_tokens": 6581416}
|
| 6923 |
+
{"current_steps": 34530, "total_steps": 38480, "loss": 0.107, "lr": 1.5886003795903853e-06, "epoch": 17.946985446985448, "percentage": 89.73, "elapsed_time": "1:23:04", "remaining_time": "0:09:30", "throughput": 1320.55, "total_tokens": 6582440}
|
| 6924 |
+
{"current_steps": 34535, "total_steps": 38480, "loss": 0.1273, "lr": 1.5846251663669487e-06, "epoch": 17.9495841995842, "percentage": 89.75, "elapsed_time": "1:23:05", "remaining_time": "0:09:29", "throughput": 1320.57, "total_tokens": 6583464}
|
| 6925 |
+
{"current_steps": 34540, "total_steps": 38480, "loss": 0.0943, "lr": 1.5806547702441083e-06, "epoch": 17.95218295218295, "percentage": 89.76, "elapsed_time": "1:23:06", "remaining_time": "0:09:28", "throughput": 1320.56, "total_tokens": 6584328}
|
| 6926 |
+
{"current_steps": 34545, "total_steps": 38480, "loss": 0.0972, "lr": 1.5766891920386633e-06, "epoch": 17.954781704781706, "percentage": 89.77, "elapsed_time": "1:23:06", "remaining_time": "0:09:28", "throughput": 1320.56, "total_tokens": 6585256}
|
| 6927 |
+
{"current_steps": 34550, "total_steps": 38480, "loss": 0.1851, "lr": 1.5727284325664253e-06, "epoch": 17.957380457380456, "percentage": 89.79, "elapsed_time": "1:23:07", "remaining_time": "0:09:27", "throughput": 1320.58, "total_tokens": 6586280}
|
| 6928 |
+
{"current_steps": 34555, "total_steps": 38480, "loss": 0.0812, "lr": 1.5687724926422205e-06, "epoch": 17.95997920997921, "percentage": 89.8, "elapsed_time": "1:23:08", "remaining_time": "0:09:26", "throughput": 1320.59, "total_tokens": 6587272}
|
| 6929 |
+
{"current_steps": 34560, "total_steps": 38480, "loss": 0.0707, "lr": 1.5648213730798805e-06, "epoch": 17.962577962577964, "percentage": 89.81, "elapsed_time": "1:23:08", "remaining_time": "0:09:25", "throughput": 1320.6, "total_tokens": 6588232}
|
| 6930 |
+
{"current_steps": 34565, "total_steps": 38480, "loss": 0.0815, "lr": 1.5608750746922446e-06, "epoch": 17.965176715176714, "percentage": 89.83, "elapsed_time": "1:23:09", "remaining_time": "0:09:25", "throughput": 1320.62, "total_tokens": 6589224}
|
| 6931 |
+
{"current_steps": 34570, "total_steps": 38480, "loss": 0.095, "lr": 1.5569335982911549e-06, "epoch": 17.967775467775468, "percentage": 89.84, "elapsed_time": "1:23:10", "remaining_time": "0:09:24", "throughput": 1320.63, "total_tokens": 6590216}
|
| 6932 |
+
{"current_steps": 34575, "total_steps": 38480, "loss": 0.1634, "lr": 1.5529969446874764e-06, "epoch": 17.97037422037422, "percentage": 89.85, "elapsed_time": "1:23:10", "remaining_time": "0:09:23", "throughput": 1320.62, "total_tokens": 6591112}
|
| 6933 |
+
{"current_steps": 34580, "total_steps": 38480, "loss": 0.0602, "lr": 1.5490651146910673e-06, "epoch": 17.972972972972972, "percentage": 89.86, "elapsed_time": "1:23:11", "remaining_time": "0:09:22", "throughput": 1320.64, "total_tokens": 6592104}
|
| 6934 |
+
{"current_steps": 34585, "total_steps": 38480, "loss": 0.1213, "lr": 1.5451381091107941e-06, "epoch": 17.975571725571726, "percentage": 89.88, "elapsed_time": "1:23:12", "remaining_time": "0:09:22", "throughput": 1320.65, "total_tokens": 6593096}
|
| 6935 |
+
{"current_steps": 34590, "total_steps": 38480, "loss": 0.1078, "lr": 1.5412159287545468e-06, "epoch": 17.97817047817048, "percentage": 89.89, "elapsed_time": "1:23:12", "remaining_time": "0:09:21", "throughput": 1320.67, "total_tokens": 6594120}
|
| 6936 |
+
{"current_steps": 34595, "total_steps": 38480, "loss": 0.1878, "lr": 1.5372985744292024e-06, "epoch": 17.98076923076923, "percentage": 89.9, "elapsed_time": "1:23:13", "remaining_time": "0:09:20", "throughput": 1320.68, "total_tokens": 6595048}
|
| 6937 |
+
{"current_steps": 34600, "total_steps": 38480, "loss": 0.1169, "lr": 1.5333860469406574e-06, "epoch": 17.983367983367984, "percentage": 89.92, "elapsed_time": "1:23:14", "remaining_time": "0:09:20", "throughput": 1320.66, "total_tokens": 6595912}
|
| 6938 |
+
{"current_steps": 34605, "total_steps": 38480, "loss": 0.0903, "lr": 1.529478347093813e-06, "epoch": 17.985966735966738, "percentage": 89.93, "elapsed_time": "1:23:15", "remaining_time": "0:09:19", "throughput": 1320.66, "total_tokens": 6596808}
|
| 6939 |
+
{"current_steps": 34610, "total_steps": 38480, "loss": 0.1952, "lr": 1.5255754756925728e-06, "epoch": 17.988565488565488, "percentage": 89.94, "elapsed_time": "1:23:15", "remaining_time": "0:09:18", "throughput": 1320.67, "total_tokens": 6597800}
|
| 6940 |
+
{"current_steps": 34615, "total_steps": 38480, "loss": 0.0934, "lr": 1.5216774335398532e-06, "epoch": 17.991164241164242, "percentage": 89.96, "elapsed_time": "1:23:16", "remaining_time": "0:09:17", "throughput": 1320.69, "total_tokens": 6598792}
|
| 6941 |
+
{"current_steps": 34620, "total_steps": 38480, "loss": 0.1356, "lr": 1.517784221437568e-06, "epoch": 17.993762993762992, "percentage": 89.97, "elapsed_time": "1:23:17", "remaining_time": "0:09:17", "throughput": 1320.69, "total_tokens": 6599752}
|
| 6942 |
+
{"current_steps": 34625, "total_steps": 38480, "loss": 0.0888, "lr": 1.513895840186655e-06, "epoch": 17.996361746361746, "percentage": 89.98, "elapsed_time": "1:23:17", "remaining_time": "0:09:16", "throughput": 1320.71, "total_tokens": 6600744}
|
| 6943 |
+
{"current_steps": 34630, "total_steps": 38480, "loss": 0.109, "lr": 1.5100122905870373e-06, "epoch": 17.9989604989605, "percentage": 89.99, "elapsed_time": "1:23:18", "remaining_time": "0:09:15", "throughput": 1320.7, "total_tokens": 6601608}
|
| 6944 |
+
{"current_steps": 34632, "total_steps": 38480, "eval_loss": 0.14623308181762695, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "1:23:26", "remaining_time": "0:09:16", "throughput": 1318.56, "total_tokens": 6601944}
|
| 6945 |
+
{"current_steps": 34635, "total_steps": 38480, "loss": 0.0397, "lr": 1.5061335734376598e-06, "epoch": 18.00155925155925, "percentage": 90.01, "elapsed_time": "1:23:28", "remaining_time": "0:09:16", "throughput": 1318.26, "total_tokens": 6602488}
|
| 6946 |
+
{"current_steps": 34640, "total_steps": 38480, "loss": 0.0782, "lr": 1.5022596895364615e-06, "epoch": 18.004158004158004, "percentage": 90.02, "elapsed_time": "1:23:29", "remaining_time": "0:09:15", "throughput": 1318.22, "total_tokens": 6603416}
|
| 6947 |
+
{"current_steps": 34645, "total_steps": 38480, "loss": 0.173, "lr": 1.4983906396803972e-06, "epoch": 18.006756756756758, "percentage": 90.03, "elapsed_time": "1:23:30", "remaining_time": "0:09:14", "throughput": 1318.22, "total_tokens": 6604376}
|