Training in progress, step 39800
Browse files- trainer_log.jsonl +41 -0
trainer_log.jsonl
CHANGED
|
@@ -8116,3 +8116,44 @@
|
|
| 8116 |
{"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 439.94444444444446, "percentage": 98.99, "elapsed_time": "3:56:50", "remaining_time": "0:02:25", "throughput": 780.24, "total_tokens": 11087296}
|
| 8117 |
{"current_steps": 39600, "total_steps": 40000, "loss": 0.0, "lr": 1.2397742806111168e-08, "epoch": 440.0, "percentage": 99.0, "elapsed_time": "3:56:51", "remaining_time": "0:02:23", "throughput": 780.24, "total_tokens": 11088720}
|
| 8118 |
{"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.907706618309021, "epoch": 440.0, "percentage": 99.0, "elapsed_time": "3:56:53", "remaining_time": "0:02:23", "throughput": 780.17, "total_tokens": 11088720}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8116 |
{"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 439.94444444444446, "percentage": 98.99, "elapsed_time": "3:56:50", "remaining_time": "0:02:25", "throughput": 780.24, "total_tokens": 11087296}
|
| 8117 |
{"current_steps": 39600, "total_steps": 40000, "loss": 0.0, "lr": 1.2397742806111168e-08, "epoch": 440.0, "percentage": 99.0, "elapsed_time": "3:56:51", "remaining_time": "0:02:23", "throughput": 780.24, "total_tokens": 11088720}
|
| 8118 |
{"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.907706618309021, "epoch": 440.0, "percentage": 99.0, "elapsed_time": "3:56:53", "remaining_time": "0:02:23", "throughput": 780.17, "total_tokens": 11088720}
|
| 8119 |
+
{"current_steps": 39605, "total_steps": 40000, "loss": 0.0, "lr": 1.209052442764369e-08, "epoch": 440.05555555555554, "percentage": 99.01, "elapsed_time": "3:56:56", "remaining_time": "0:02:21", "throughput": 780.07, "total_tokens": 11090144}
|
| 8120 |
+
{"current_steps": 39610, "total_steps": 40000, "loss": 0.0, "lr": 1.17871594988328e-08, "epoch": 440.1111111111111, "percentage": 99.02, "elapsed_time": "3:56:58", "remaining_time": "0:02:19", "throughput": 780.07, "total_tokens": 11091552}
|
| 8121 |
+
{"current_steps": 39615, "total_steps": 40000, "loss": 0.0, "lr": 1.1487648066466072e-08, "epoch": 440.1666666666667, "percentage": 99.04, "elapsed_time": "3:57:00", "remaining_time": "0:02:18", "throughput": 780.08, "total_tokens": 11092912}
|
| 8122 |
+
{"current_steps": 39620, "total_steps": 40000, "loss": 0.0, "lr": 1.1191990176728784e-08, "epoch": 440.22222222222223, "percentage": 99.05, "elapsed_time": "3:57:02", "remaining_time": "0:02:16", "throughput": 780.08, "total_tokens": 11094320}
|
| 8123 |
+
{"current_steps": 39625, "total_steps": 40000, "loss": 0.0, "lr": 1.0900185875215018e-08, "epoch": 440.27777777777777, "percentage": 99.06, "elapsed_time": "3:57:03", "remaining_time": "0:02:14", "throughput": 780.08, "total_tokens": 11095696}
|
| 8124 |
+
{"current_steps": 39630, "total_steps": 40000, "loss": 0.0, "lr": 1.0612235206924891e-08, "epoch": 440.3333333333333, "percentage": 99.08, "elapsed_time": "3:57:05", "remaining_time": "0:02:12", "throughput": 780.09, "total_tokens": 11097088}
|
| 8125 |
+
{"current_steps": 39635, "total_steps": 40000, "loss": 0.0, "lr": 1.0328138216264549e-08, "epoch": 440.3888888888889, "percentage": 99.09, "elapsed_time": "3:57:07", "remaining_time": "0:02:11", "throughput": 780.1, "total_tokens": 11098544}
|
| 8126 |
+
{"current_steps": 39640, "total_steps": 40000, "loss": 0.0, "lr": 1.004789494704339e-08, "epoch": 440.44444444444446, "percentage": 99.1, "elapsed_time": "3:57:08", "remaining_time": "0:02:09", "throughput": 780.1, "total_tokens": 11099952}
|
| 8127 |
+
{"current_steps": 39645, "total_steps": 40000, "loss": 0.0, "lr": 9.771505442482397e-09, "epoch": 440.5, "percentage": 99.11, "elapsed_time": "3:57:10", "remaining_time": "0:02:07", "throughput": 780.11, "total_tokens": 11101376}
|
| 8128 |
+
{"current_steps": 39650, "total_steps": 40000, "loss": 0.0, "lr": 9.498969745200259e-09, "epoch": 440.55555555555554, "percentage": 99.12, "elapsed_time": "3:57:12", "remaining_time": "0:02:05", "throughput": 780.11, "total_tokens": 11102752}
|
| 8129 |
+
{"current_steps": 39655, "total_steps": 40000, "loss": 0.0, "lr": 9.230287897230017e-09, "epoch": 440.6111111111111, "percentage": 99.14, "elapsed_time": "3:57:13", "remaining_time": "0:02:03", "throughput": 780.12, "total_tokens": 11104128}
|
| 8130 |
+
{"current_steps": 39660, "total_steps": 40000, "loss": 0.0, "lr": 8.965459940002419e-09, "epoch": 440.6666666666667, "percentage": 99.15, "elapsed_time": "3:57:15", "remaining_time": "0:02:02", "throughput": 780.12, "total_tokens": 11105504}
|
| 8131 |
+
{"current_steps": 39665, "total_steps": 40000, "loss": 0.0, "lr": 8.704485914357019e-09, "epoch": 440.72222222222223, "percentage": 99.16, "elapsed_time": "3:57:17", "remaining_time": "0:02:00", "throughput": 780.13, "total_tokens": 11106928}
|
| 8132 |
+
{"current_steps": 39670, "total_steps": 40000, "loss": 0.0, "lr": 8.447365860539402e-09, "epoch": 440.77777777777777, "percentage": 99.17, "elapsed_time": "3:57:19", "remaining_time": "0:01:58", "throughput": 780.13, "total_tokens": 11108304}
|
| 8133 |
+
{"current_steps": 39675, "total_steps": 40000, "loss": 0.0, "lr": 8.194099818201184e-09, "epoch": 440.8333333333333, "percentage": 99.19, "elapsed_time": "3:57:20", "remaining_time": "0:01:56", "throughput": 780.14, "total_tokens": 11109696}
|
| 8134 |
+
{"current_steps": 39680, "total_steps": 40000, "loss": 0.0, "lr": 7.944687826400011e-09, "epoch": 440.8888888888889, "percentage": 99.2, "elapsed_time": "3:57:22", "remaining_time": "0:01:54", "throughput": 780.14, "total_tokens": 11111104}
|
| 8135 |
+
{"current_steps": 39685, "total_steps": 40000, "loss": 0.0, "lr": 7.699129923599557e-09, "epoch": 440.94444444444446, "percentage": 99.21, "elapsed_time": "3:57:24", "remaining_time": "0:01:53", "throughput": 780.15, "total_tokens": 11112496}
|
| 8136 |
+
{"current_steps": 39690, "total_steps": 40000, "loss": 0.0, "lr": 7.457426147663982e-09, "epoch": 441.0, "percentage": 99.22, "elapsed_time": "3:57:25", "remaining_time": "0:01:51", "throughput": 780.14, "total_tokens": 11113888}
|
| 8137 |
+
{"current_steps": 39695, "total_steps": 40000, "loss": 0.0, "lr": 7.219576535871797e-09, "epoch": 441.05555555555554, "percentage": 99.24, "elapsed_time": "3:57:27", "remaining_time": "0:01:49", "throughput": 780.13, "total_tokens": 11115264}
|
| 8138 |
+
{"current_steps": 39700, "total_steps": 40000, "loss": 0.0, "lr": 6.985581124896445e-09, "epoch": 441.1111111111111, "percentage": 99.25, "elapsed_time": "3:57:29", "remaining_time": "0:01:47", "throughput": 780.14, "total_tokens": 11116672}
|
| 8139 |
+
{"current_steps": 39705, "total_steps": 40000, "loss": 0.0, "lr": 6.755439950828501e-09, "epoch": 441.1666666666667, "percentage": 99.26, "elapsed_time": "3:57:31", "remaining_time": "0:01:45", "throughput": 780.14, "total_tokens": 11118096}
|
| 8140 |
+
{"current_steps": 39710, "total_steps": 40000, "loss": 0.0, "lr": 6.5291530491562444e-09, "epoch": 441.22222222222223, "percentage": 99.28, "elapsed_time": "3:57:33", "remaining_time": "0:01:44", "throughput": 780.15, "total_tokens": 11119520}
|
| 8141 |
+
{"current_steps": 39715, "total_steps": 40000, "loss": 0.0, "lr": 6.3067204547739845e-09, "epoch": 441.27777777777777, "percentage": 99.29, "elapsed_time": "3:57:34", "remaining_time": "0:01:42", "throughput": 780.16, "total_tokens": 11120912}
|
| 8142 |
+
{"current_steps": 39720, "total_steps": 40000, "loss": 0.0, "lr": 6.088142201987612e-09, "epoch": 441.3333333333333, "percentage": 99.3, "elapsed_time": "3:57:36", "remaining_time": "0:01:40", "throughput": 780.16, "total_tokens": 11122320}
|
| 8143 |
+
{"current_steps": 39725, "total_steps": 40000, "loss": 0.0, "lr": 5.873418324503499e-09, "epoch": 441.3888888888889, "percentage": 99.31, "elapsed_time": "3:57:38", "remaining_time": "0:01:38", "throughput": 780.17, "total_tokens": 11123728}
|
| 8144 |
+
{"current_steps": 39730, "total_steps": 40000, "loss": 0.0, "lr": 5.6625488554340465e-09, "epoch": 441.44444444444446, "percentage": 99.33, "elapsed_time": "3:57:39", "remaining_time": "0:01:36", "throughput": 780.18, "total_tokens": 11125168}
|
| 8145 |
+
{"current_steps": 39735, "total_steps": 40000, "loss": 0.0, "lr": 5.455533827297688e-09, "epoch": 441.5, "percentage": 99.34, "elapsed_time": "3:57:41", "remaining_time": "0:01:35", "throughput": 780.18, "total_tokens": 11126576}
|
| 8146 |
+
{"current_steps": 39740, "total_steps": 40000, "loss": 0.0, "lr": 5.252373272018885e-09, "epoch": 441.55555555555554, "percentage": 99.35, "elapsed_time": "3:57:43", "remaining_time": "0:01:33", "throughput": 780.19, "total_tokens": 11127952}
|
| 8147 |
+
{"current_steps": 39745, "total_steps": 40000, "loss": 0.0, "lr": 5.053067220925356e-09, "epoch": 441.6111111111111, "percentage": 99.36, "elapsed_time": "3:57:44", "remaining_time": "0:01:31", "throughput": 780.19, "total_tokens": 11129360}
|
| 8148 |
+
{"current_steps": 39750, "total_steps": 40000, "loss": 0.0, "lr": 4.857615704759177e-09, "epoch": 441.6666666666667, "percentage": 99.38, "elapsed_time": "3:57:46", "remaining_time": "0:01:29", "throughput": 780.2, "total_tokens": 11130736}
|
| 8149 |
+
{"current_steps": 39755, "total_steps": 40000, "loss": 0.0, "lr": 4.666018753654577e-09, "epoch": 441.72222222222223, "percentage": 99.39, "elapsed_time": "3:57:48", "remaining_time": "0:01:27", "throughput": 780.19, "total_tokens": 11132144}
|
| 8150 |
+
{"current_steps": 39760, "total_steps": 40000, "loss": 0.0, "lr": 4.478276397162917e-09, "epoch": 441.77777777777777, "percentage": 99.4, "elapsed_time": "3:57:50", "remaining_time": "0:01:26", "throughput": 780.19, "total_tokens": 11133552}
|
| 8151 |
+
{"current_steps": 39765, "total_steps": 40000, "loss": 0.0, "lr": 4.294388664233262e-09, "epoch": 441.8333333333333, "percentage": 99.41, "elapsed_time": "3:57:51", "remaining_time": "0:01:24", "throughput": 780.19, "total_tokens": 11134912}
|
| 8152 |
+
{"current_steps": 39770, "total_steps": 40000, "loss": 0.0, "lr": 4.114355583223484e-09, "epoch": 441.8888888888889, "percentage": 99.42, "elapsed_time": "3:57:53", "remaining_time": "0:01:22", "throughput": 780.2, "total_tokens": 11136336}
|
| 8153 |
+
{"current_steps": 39775, "total_steps": 40000, "loss": 0.0, "lr": 3.9381771818974845e-09, "epoch": 441.94444444444446, "percentage": 99.44, "elapsed_time": "3:57:55", "remaining_time": "0:01:20", "throughput": 780.21, "total_tokens": 11137744}
|
| 8154 |
+
{"current_steps": 39780, "total_steps": 40000, "loss": 0.0, "lr": 3.765853487427973e-09, "epoch": 442.0, "percentage": 99.45, "elapsed_time": "3:57:57", "remaining_time": "0:01:18", "throughput": 780.2, "total_tokens": 11139120}
|
| 8155 |
+
{"current_steps": 39785, "total_steps": 40000, "loss": 0.0, "lr": 3.5973845263825857e-09, "epoch": 442.05555555555554, "percentage": 99.46, "elapsed_time": "3:57:59", "remaining_time": "0:01:17", "throughput": 780.2, "total_tokens": 11140544}
|
| 8156 |
+
{"current_steps": 39790, "total_steps": 40000, "loss": 0.0, "lr": 3.4327703247488684e-09, "epoch": 442.1111111111111, "percentage": 99.48, "elapsed_time": "3:58:00", "remaining_time": "0:01:15", "throughput": 780.2, "total_tokens": 11141920}
|
| 8157 |
+
{"current_steps": 39795, "total_steps": 40000, "loss": 0.0, "lr": 3.2720109079037443e-09, "epoch": 442.1666666666667, "percentage": 99.49, "elapsed_time": "3:58:02", "remaining_time": "0:01:13", "throughput": 780.21, "total_tokens": 11143312}
|
| 8158 |
+
{"current_steps": 39800, "total_steps": 40000, "loss": 0.0, "lr": 3.1151063006468193e-09, "epoch": 442.22222222222223, "percentage": 99.5, "elapsed_time": "3:58:04", "remaining_time": "0:01:11", "throughput": 780.21, "total_tokens": 11144688}
|
| 8159 |
+
{"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.907706618309021, "epoch": 442.22222222222223, "percentage": 99.5, "elapsed_time": "3:58:05", "remaining_time": "0:01:11", "throughput": 780.14, "total_tokens": 11144688}
|