Training in progress, step 39800
Browse files- trainer_log.jsonl +41 -0
trainer_log.jsonl
CHANGED
|
@@ -8116,3 +8116,44 @@
|
|
| 8116 |
{"current_steps": 39595, "total_steps": 40000, "loss": 0.0882, "lr": 1.2708814586862016e-08, "epoch": 82.31808731808732, "percentage": 98.99, "elapsed_time": "8:08:47", "remaining_time": "0:04:59", "throughput": 1054.89, "total_tokens": 30937000}
|
| 8117 |
{"current_steps": 39600, "total_steps": 40000, "loss": 0.1149, "lr": 1.2397742806111168e-08, "epoch": 82.32848232848232, "percentage": 99.0, "elapsed_time": "8:08:50", "remaining_time": "0:04:56", "throughput": 1054.9, "total_tokens": 30940904}
|
| 8118 |
{"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.13433800637722015, "epoch": 82.32848232848232, "percentage": 99.0, "elapsed_time": "8:09:00", "remaining_time": "0:04:56", "throughput": 1054.54, "total_tokens": 30940904}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8116 |
{"current_steps": 39595, "total_steps": 40000, "loss": 0.0882, "lr": 1.2708814586862016e-08, "epoch": 82.31808731808732, "percentage": 98.99, "elapsed_time": "8:08:47", "remaining_time": "0:04:59", "throughput": 1054.89, "total_tokens": 30937000}
|
| 8117 |
{"current_steps": 39600, "total_steps": 40000, "loss": 0.1149, "lr": 1.2397742806111168e-08, "epoch": 82.32848232848232, "percentage": 99.0, "elapsed_time": "8:08:50", "remaining_time": "0:04:56", "throughput": 1054.9, "total_tokens": 30940904}
|
| 8118 |
{"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.13433800637722015, "epoch": 82.32848232848232, "percentage": 99.0, "elapsed_time": "8:09:00", "remaining_time": "0:04:56", "throughput": 1054.54, "total_tokens": 30940904}
|
| 8119 |
+
{"current_steps": 39605, "total_steps": 40000, "loss": 0.1051, "lr": 1.209052442764369e-08, "epoch": 82.33887733887734, "percentage": 99.01, "elapsed_time": "8:09:05", "remaining_time": "0:04:52", "throughput": 1054.5, "total_tokens": 30945000}
|
| 8120 |
+
{"current_steps": 39610, "total_steps": 40000, "loss": 0.16, "lr": 1.17871594988328e-08, "epoch": 82.34927234927235, "percentage": 99.02, "elapsed_time": "8:09:09", "remaining_time": "0:04:48", "throughput": 1054.51, "total_tokens": 30948968}
|
| 8121 |
+
{"current_steps": 39615, "total_steps": 40000, "loss": 0.0926, "lr": 1.1487648066466072e-08, "epoch": 82.35966735966736, "percentage": 99.04, "elapsed_time": "8:09:12", "remaining_time": "0:04:45", "throughput": 1054.53, "total_tokens": 30953032}
|
| 8122 |
+
{"current_steps": 39620, "total_steps": 40000, "loss": 0.1342, "lr": 1.1191990176728784e-08, "epoch": 82.37006237006237, "percentage": 99.05, "elapsed_time": "8:09:15", "remaining_time": "0:04:41", "throughput": 1054.54, "total_tokens": 30956872}
|
| 8123 |
+
{"current_steps": 39625, "total_steps": 40000, "loss": 0.0886, "lr": 1.0900185875215018e-08, "epoch": 82.38045738045739, "percentage": 99.06, "elapsed_time": "8:09:19", "remaining_time": "0:04:37", "throughput": 1054.55, "total_tokens": 30960712}
|
| 8124 |
+
{"current_steps": 39630, "total_steps": 40000, "loss": 0.1008, "lr": 1.0612235206924891e-08, "epoch": 82.39085239085239, "percentage": 99.08, "elapsed_time": "8:09:22", "remaining_time": "0:04:34", "throughput": 1054.56, "total_tokens": 30964584}
|
| 8125 |
+
{"current_steps": 39635, "total_steps": 40000, "loss": 0.0889, "lr": 1.0328138216264549e-08, "epoch": 82.4012474012474, "percentage": 99.09, "elapsed_time": "8:09:26", "remaining_time": "0:04:30", "throughput": 1054.57, "total_tokens": 30968584}
|
| 8126 |
+
{"current_steps": 39640, "total_steps": 40000, "loss": 0.1399, "lr": 1.004789494704339e-08, "epoch": 82.41164241164242, "percentage": 99.1, "elapsed_time": "8:09:29", "remaining_time": "0:04:26", "throughput": 1054.58, "total_tokens": 30972392}
|
| 8127 |
+
{"current_steps": 39645, "total_steps": 40000, "loss": 0.1318, "lr": 9.771505442482397e-09, "epoch": 82.42203742203742, "percentage": 99.11, "elapsed_time": "8:09:32", "remaining_time": "0:04:23", "throughput": 1054.58, "total_tokens": 30976168}
|
| 8128 |
+
{"current_steps": 39650, "total_steps": 40000, "loss": 0.1145, "lr": 9.498969745200259e-09, "epoch": 82.43243243243244, "percentage": 99.12, "elapsed_time": "8:09:36", "remaining_time": "0:04:19", "throughput": 1054.6, "total_tokens": 30980136}
|
| 8129 |
+
{"current_steps": 39655, "total_steps": 40000, "loss": 0.1047, "lr": 9.230287897230017e-09, "epoch": 82.44282744282744, "percentage": 99.14, "elapsed_time": "8:09:39", "remaining_time": "0:04:15", "throughput": 1054.61, "total_tokens": 30984104}
|
| 8130 |
+
{"current_steps": 39660, "total_steps": 40000, "loss": 0.1432, "lr": 8.965459940002419e-09, "epoch": 82.45322245322245, "percentage": 99.15, "elapsed_time": "8:09:43", "remaining_time": "0:04:11", "throughput": 1054.62, "total_tokens": 30988072}
|
| 8131 |
+
{"current_steps": 39665, "total_steps": 40000, "loss": 0.1157, "lr": 8.704485914357019e-09, "epoch": 82.46361746361747, "percentage": 99.16, "elapsed_time": "8:09:46", "remaining_time": "0:04:08", "throughput": 1054.63, "total_tokens": 30991880}
|
| 8132 |
+
{"current_steps": 39670, "total_steps": 40000, "loss": 0.1198, "lr": 8.447365860539402e-09, "epoch": 82.47401247401247, "percentage": 99.17, "elapsed_time": "8:09:49", "remaining_time": "0:04:04", "throughput": 1054.65, "total_tokens": 30996008}
|
| 8133 |
+
{"current_steps": 39675, "total_steps": 40000, "loss": 0.1434, "lr": 8.194099818201184e-09, "epoch": 82.48440748440748, "percentage": 99.19, "elapsed_time": "8:09:53", "remaining_time": "0:04:00", "throughput": 1054.66, "total_tokens": 30999944}
|
| 8134 |
+
{"current_steps": 39680, "total_steps": 40000, "loss": 0.1451, "lr": 7.944687826400011e-09, "epoch": 82.4948024948025, "percentage": 99.2, "elapsed_time": "8:09:56", "remaining_time": "0:03:57", "throughput": 1054.67, "total_tokens": 31003784}
|
| 8135 |
+
{"current_steps": 39685, "total_steps": 40000, "loss": 0.1251, "lr": 7.699129923599557e-09, "epoch": 82.5051975051975, "percentage": 99.21, "elapsed_time": "8:10:00", "remaining_time": "0:03:53", "throughput": 1054.68, "total_tokens": 31007720}
|
| 8136 |
+
{"current_steps": 39690, "total_steps": 40000, "loss": 0.0988, "lr": 7.457426147663982e-09, "epoch": 82.51559251559252, "percentage": 99.22, "elapsed_time": "8:10:03", "remaining_time": "0:03:49", "throughput": 1054.69, "total_tokens": 31011592}
|
| 8137 |
+
{"current_steps": 39695, "total_steps": 40000, "loss": 0.1282, "lr": 7.219576535871797e-09, "epoch": 82.52598752598753, "percentage": 99.24, "elapsed_time": "8:10:06", "remaining_time": "0:03:45", "throughput": 1054.7, "total_tokens": 31015432}
|
| 8138 |
+
{"current_steps": 39700, "total_steps": 40000, "loss": 0.1034, "lr": 6.985581124896445e-09, "epoch": 82.53638253638253, "percentage": 99.25, "elapsed_time": "8:10:10", "remaining_time": "0:03:42", "throughput": 1054.71, "total_tokens": 31019176}
|
| 8139 |
+
{"current_steps": 39705, "total_steps": 40000, "loss": 0.1275, "lr": 6.755439950828501e-09, "epoch": 82.54677754677755, "percentage": 99.26, "elapsed_time": "8:10:13", "remaining_time": "0:03:38", "throughput": 1054.72, "total_tokens": 31023144}
|
| 8140 |
+
{"current_steps": 39710, "total_steps": 40000, "loss": 0.1379, "lr": 6.5291530491562444e-09, "epoch": 82.55717255717256, "percentage": 99.28, "elapsed_time": "8:10:17", "remaining_time": "0:03:34", "throughput": 1054.73, "total_tokens": 31027112}
|
| 8141 |
+
{"current_steps": 39715, "total_steps": 40000, "loss": 0.1049, "lr": 6.3067204547739845e-09, "epoch": 82.56756756756756, "percentage": 99.29, "elapsed_time": "8:10:20", "remaining_time": "0:03:31", "throughput": 1054.75, "total_tokens": 31031080}
|
| 8142 |
+
{"current_steps": 39720, "total_steps": 40000, "loss": 0.1312, "lr": 6.088142201987612e-09, "epoch": 82.57796257796258, "percentage": 99.3, "elapsed_time": "8:10:23", "remaining_time": "0:03:27", "throughput": 1054.76, "total_tokens": 31035016}
|
| 8143 |
+
{"current_steps": 39725, "total_steps": 40000, "loss": 0.1529, "lr": 5.873418324503499e-09, "epoch": 82.58835758835758, "percentage": 99.31, "elapsed_time": "8:10:27", "remaining_time": "0:03:23", "throughput": 1054.77, "total_tokens": 31038984}
|
| 8144 |
+
{"current_steps": 39730, "total_steps": 40000, "loss": 0.1597, "lr": 5.6625488554340465e-09, "epoch": 82.5987525987526, "percentage": 99.33, "elapsed_time": "8:10:30", "remaining_time": "0:03:20", "throughput": 1054.79, "total_tokens": 31042952}
|
| 8145 |
+
{"current_steps": 39735, "total_steps": 40000, "loss": 0.1126, "lr": 5.455533827297688e-09, "epoch": 82.60914760914761, "percentage": 99.34, "elapsed_time": "8:10:33", "remaining_time": "0:03:16", "throughput": 1054.79, "total_tokens": 31046696}
|
| 8146 |
+
{"current_steps": 39740, "total_steps": 40000, "loss": 0.1355, "lr": 5.252373272018885e-09, "epoch": 82.61954261954261, "percentage": 99.35, "elapsed_time": "8:10:37", "remaining_time": "0:03:12", "throughput": 1054.8, "total_tokens": 31050632}
|
| 8147 |
+
{"current_steps": 39745, "total_steps": 40000, "loss": 0.099, "lr": 5.053067220925356e-09, "epoch": 82.62993762993763, "percentage": 99.36, "elapsed_time": "8:10:40", "remaining_time": "0:03:08", "throughput": 1054.82, "total_tokens": 31054568}
|
| 8148 |
+
{"current_steps": 39750, "total_steps": 40000, "loss": 0.1343, "lr": 4.857615704759177e-09, "epoch": 82.64033264033264, "percentage": 99.38, "elapsed_time": "8:10:44", "remaining_time": "0:03:05", "throughput": 1054.82, "total_tokens": 31058408}
|
| 8149 |
+
{"current_steps": 39755, "total_steps": 40000, "loss": 0.1048, "lr": 4.666018753654577e-09, "epoch": 82.65072765072765, "percentage": 99.39, "elapsed_time": "8:10:47", "remaining_time": "0:03:01", "throughput": 1054.84, "total_tokens": 31062376}
|
| 8150 |
+
{"current_steps": 39760, "total_steps": 40000, "loss": 0.1332, "lr": 4.478276397162917e-09, "epoch": 82.66112266112266, "percentage": 99.4, "elapsed_time": "8:10:50", "remaining_time": "0:02:57", "throughput": 1054.85, "total_tokens": 31066248}
|
| 8151 |
+
{"current_steps": 39765, "total_steps": 40000, "loss": 0.1408, "lr": 4.294388664233262e-09, "epoch": 82.67151767151768, "percentage": 99.41, "elapsed_time": "8:10:54", "remaining_time": "0:02:54", "throughput": 1054.85, "total_tokens": 31070056}
|
| 8152 |
+
{"current_steps": 39770, "total_steps": 40000, "loss": 0.1169, "lr": 4.114355583223484e-09, "epoch": 82.68191268191268, "percentage": 99.42, "elapsed_time": "8:10:57", "remaining_time": "0:02:50", "throughput": 1054.86, "total_tokens": 31073928}
|
| 8153 |
+
{"current_steps": 39775, "total_steps": 40000, "loss": 0.1165, "lr": 3.9381771818974845e-09, "epoch": 82.6923076923077, "percentage": 99.44, "elapsed_time": "8:11:01", "remaining_time": "0:02:46", "throughput": 1054.88, "total_tokens": 31077896}
|
| 8154 |
+
{"current_steps": 39780, "total_steps": 40000, "loss": 0.1258, "lr": 3.765853487427973e-09, "epoch": 82.70270270270271, "percentage": 99.45, "elapsed_time": "8:11:04", "remaining_time": "0:02:42", "throughput": 1054.89, "total_tokens": 31081928}
|
| 8155 |
+
{"current_steps": 39785, "total_steps": 40000, "loss": 0.1105, "lr": 3.5973845263825857e-09, "epoch": 82.71309771309771, "percentage": 99.46, "elapsed_time": "8:11:07", "remaining_time": "0:02:39", "throughput": 1054.9, "total_tokens": 31085832}
|
| 8156 |
+
{"current_steps": 39790, "total_steps": 40000, "loss": 0.1076, "lr": 3.4327703247488684e-09, "epoch": 82.72349272349273, "percentage": 99.48, "elapsed_time": "8:11:11", "remaining_time": "0:02:35", "throughput": 1054.91, "total_tokens": 31089608}
|
| 8157 |
+
{"current_steps": 39795, "total_steps": 40000, "loss": 0.1317, "lr": 3.2720109079037443e-09, "epoch": 82.73388773388774, "percentage": 99.49, "elapsed_time": "8:11:14", "remaining_time": "0:02:31", "throughput": 1054.92, "total_tokens": 31093384}
|
| 8158 |
+
{"current_steps": 39800, "total_steps": 40000, "loss": 0.106, "lr": 3.1151063006468193e-09, "epoch": 82.74428274428274, "percentage": 99.5, "elapsed_time": "8:11:18", "remaining_time": "0:02:28", "throughput": 1054.93, "total_tokens": 31097352}
|
| 8159 |
+
{"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.13433800637722015, "epoch": 82.74428274428274, "percentage": 99.5, "elapsed_time": "8:11:28", "remaining_time": "0:02:28", "throughput": 1054.57, "total_tokens": 31097352}
|