Training in progress, step 39800
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18124968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e094b83acd2083698d2c5ad6779656a3e2eec075bfc887240e14f3fabd9b129c
|
| 3 |
size 18124968
|
trainer_log.jsonl
CHANGED
|
@@ -8117,3 +8117,44 @@
|
|
| 8117 |
{"current_steps": 39600, "total_steps": 40000, "loss": 0.315, "lr": 1.2397742806111168e-08, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "1 day, 2:06:58", "remaining_time": "0:15:49", "throughput": 579.19, "total_tokens": 54454880}
|
| 8118 |
{"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.41838592290878296, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "1 day, 2:13:32", "remaining_time": "0:15:53", "throughput": 576.78, "total_tokens": 54454880}
|
| 8119 |
{"current_steps": 39605, "total_steps": 40000, "loss": 0.316, "lr": 1.209052442764369e-08, "epoch": 1.2676845272389732, "percentage": 99.01, "elapsed_time": "1 day, 2:13:36", "remaining_time": "0:15:41", "throughput": 576.83, "total_tokens": 54462032}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8117 |
{"current_steps": 39600, "total_steps": 40000, "loss": 0.315, "lr": 1.2397742806111168e-08, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "1 day, 2:06:58", "remaining_time": "0:15:49", "throughput": 579.19, "total_tokens": 54454880}
|
| 8118 |
{"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.41838592290878296, "epoch": 1.2675244862684847, "percentage": 99.0, "elapsed_time": "1 day, 2:13:32", "remaining_time": "0:15:53", "throughput": 576.78, "total_tokens": 54454880}
|
| 8119 |
{"current_steps": 39605, "total_steps": 40000, "loss": 0.316, "lr": 1.209052442764369e-08, "epoch": 1.2676845272389732, "percentage": 99.01, "elapsed_time": "1 day, 2:13:36", "remaining_time": "0:15:41", "throughput": 576.83, "total_tokens": 54462032}
|
| 8120 |
+
{"current_steps": 39610, "total_steps": 40000, "loss": 0.3079, "lr": 1.17871594988328e-08, "epoch": 1.2678445682094617, "percentage": 99.02, "elapsed_time": "1 day, 2:13:38", "remaining_time": "0:15:29", "throughput": 576.89, "total_tokens": 54468704}
|
| 8121 |
+
{"current_steps": 39615, "total_steps": 40000, "loss": 0.2138, "lr": 1.1487648066466072e-08, "epoch": 1.26800460917995, "percentage": 99.04, "elapsed_time": "1 day, 2:13:40", "remaining_time": "0:15:17", "throughput": 576.94, "total_tokens": 54475296}
|
| 8122 |
+
{"current_steps": 39620, "total_steps": 40000, "loss": 0.3969, "lr": 1.1191990176728784e-08, "epoch": 1.2681646501504384, "percentage": 99.05, "elapsed_time": "1 day, 2:13:42", "remaining_time": "0:15:05", "throughput": 577.0, "total_tokens": 54481920}
|
| 8123 |
+
{"current_steps": 39625, "total_steps": 40000, "loss": 0.365, "lr": 1.0900185875215018e-08, "epoch": 1.268324691120927, "percentage": 99.06, "elapsed_time": "1 day, 2:13:44", "remaining_time": "0:14:53", "throughput": 577.06, "total_tokens": 54488688}
|
| 8124 |
+
{"current_steps": 39630, "total_steps": 40000, "loss": 0.151, "lr": 1.0612235206924891e-08, "epoch": 1.2684847320914154, "percentage": 99.08, "elapsed_time": "1 day, 2:13:46", "remaining_time": "0:14:41", "throughput": 577.12, "total_tokens": 54495296}
|
| 8125 |
+
{"current_steps": 39635, "total_steps": 40000, "loss": 0.3231, "lr": 1.0328138216264549e-08, "epoch": 1.268644773061904, "percentage": 99.09, "elapsed_time": "1 day, 2:13:48", "remaining_time": "0:14:29", "throughput": 577.18, "total_tokens": 54502112}
|
| 8126 |
+
{"current_steps": 39640, "total_steps": 40000, "loss": 0.3474, "lr": 1.004789494704339e-08, "epoch": 1.2688048140323924, "percentage": 99.1, "elapsed_time": "1 day, 2:13:50", "remaining_time": "0:14:17", "throughput": 577.24, "total_tokens": 54508928}
|
| 8127 |
+
{"current_steps": 39645, "total_steps": 40000, "loss": 0.3912, "lr": 9.771505442482397e-09, "epoch": 1.2689648550028807, "percentage": 99.11, "elapsed_time": "1 day, 2:13:52", "remaining_time": "0:14:05", "throughput": 577.29, "total_tokens": 54515536}
|
| 8128 |
+
{"current_steps": 39650, "total_steps": 40000, "loss": 0.3672, "lr": 9.498969745200259e-09, "epoch": 1.2691248959733692, "percentage": 99.12, "elapsed_time": "1 day, 2:13:54", "remaining_time": "0:13:53", "throughput": 577.36, "total_tokens": 54522912}
|
| 8129 |
+
{"current_steps": 39655, "total_steps": 40000, "loss": 0.346, "lr": 9.230287897230017e-09, "epoch": 1.2692849369438577, "percentage": 99.14, "elapsed_time": "1 day, 2:13:56", "remaining_time": "0:13:41", "throughput": 577.42, "total_tokens": 54529600}
|
| 8130 |
+
{"current_steps": 39660, "total_steps": 40000, "loss": 0.283, "lr": 8.965459940002419e-09, "epoch": 1.269444977914346, "percentage": 99.15, "elapsed_time": "1 day, 2:13:58", "remaining_time": "0:13:29", "throughput": 577.48, "total_tokens": 54536256}
|
| 8131 |
+
{"current_steps": 39665, "total_steps": 40000, "loss": 0.2666, "lr": 8.704485914357019e-09, "epoch": 1.2696050188848345, "percentage": 99.16, "elapsed_time": "1 day, 2:14:01", "remaining_time": "0:13:17", "throughput": 577.54, "total_tokens": 54543056}
|
| 8132 |
+
{"current_steps": 39670, "total_steps": 40000, "loss": 0.3133, "lr": 8.447365860539402e-09, "epoch": 1.269765059855323, "percentage": 99.17, "elapsed_time": "1 day, 2:14:03", "remaining_time": "0:13:05", "throughput": 577.6, "total_tokens": 54550016}
|
| 8133 |
+
{"current_steps": 39675, "total_steps": 40000, "loss": 0.322, "lr": 8.194099818201184e-09, "epoch": 1.2699251008258114, "percentage": 99.19, "elapsed_time": "1 day, 2:14:05", "remaining_time": "0:12:53", "throughput": 577.66, "total_tokens": 54556960}
|
| 8134 |
+
{"current_steps": 39680, "total_steps": 40000, "loss": 0.4901, "lr": 7.944687826400011e-09, "epoch": 1.2700851417963, "percentage": 99.2, "elapsed_time": "1 day, 2:14:07", "remaining_time": "0:12:41", "throughput": 577.72, "total_tokens": 54563760}
|
| 8135 |
+
{"current_steps": 39685, "total_steps": 40000, "loss": 0.3768, "lr": 7.699129923599557e-09, "epoch": 1.2702451827667882, "percentage": 99.21, "elapsed_time": "1 day, 2:14:09", "remaining_time": "0:12:29", "throughput": 577.78, "total_tokens": 54570768}
|
| 8136 |
+
{"current_steps": 39690, "total_steps": 40000, "loss": 0.4372, "lr": 7.457426147663982e-09, "epoch": 1.2704052237372767, "percentage": 99.22, "elapsed_time": "1 day, 2:14:11", "remaining_time": "0:12:17", "throughput": 577.84, "total_tokens": 54577712}
|
| 8137 |
+
{"current_steps": 39695, "total_steps": 40000, "loss": 0.5296, "lr": 7.219576535871797e-09, "epoch": 1.2705652647077652, "percentage": 99.24, "elapsed_time": "1 day, 2:14:13", "remaining_time": "0:12:05", "throughput": 577.9, "total_tokens": 54584176}
|
| 8138 |
+
{"current_steps": 39700, "total_steps": 40000, "loss": 0.3758, "lr": 6.985581124896445e-09, "epoch": 1.2707253056782537, "percentage": 99.25, "elapsed_time": "1 day, 2:14:15", "remaining_time": "0:11:53", "throughput": 577.95, "total_tokens": 54590512}
|
| 8139 |
+
{"current_steps": 39705, "total_steps": 40000, "loss": 0.4265, "lr": 6.755439950828501e-09, "epoch": 1.270885346648742, "percentage": 99.26, "elapsed_time": "1 day, 2:14:17", "remaining_time": "0:11:41", "throughput": 578.01, "total_tokens": 54597568}
|
| 8140 |
+
{"current_steps": 39710, "total_steps": 40000, "loss": 0.3209, "lr": 6.5291530491562444e-09, "epoch": 1.2710453876192305, "percentage": 99.28, "elapsed_time": "1 day, 2:14:19", "remaining_time": "0:11:29", "throughput": 578.07, "total_tokens": 54604624}
|
| 8141 |
+
{"current_steps": 39715, "total_steps": 40000, "loss": 0.5058, "lr": 6.3067204547739845e-09, "epoch": 1.271205428589719, "percentage": 99.29, "elapsed_time": "1 day, 2:14:21", "remaining_time": "0:11:17", "throughput": 578.13, "total_tokens": 54611008}
|
| 8142 |
+
{"current_steps": 39720, "total_steps": 40000, "loss": 0.4093, "lr": 6.088142201987612e-09, "epoch": 1.2713654695602075, "percentage": 99.3, "elapsed_time": "1 day, 2:14:23", "remaining_time": "0:11:05", "throughput": 578.19, "total_tokens": 54618160}
|
| 8143 |
+
{"current_steps": 39725, "total_steps": 40000, "loss": 0.378, "lr": 5.873418324503499e-09, "epoch": 1.271525510530696, "percentage": 99.31, "elapsed_time": "1 day, 2:14:25", "remaining_time": "0:10:53", "throughput": 578.25, "total_tokens": 54624560}
|
| 8144 |
+
{"current_steps": 39730, "total_steps": 40000, "loss": 0.2481, "lr": 5.6625488554340465e-09, "epoch": 1.2716855515011842, "percentage": 99.33, "elapsed_time": "1 day, 2:14:27", "remaining_time": "0:10:41", "throughput": 578.3, "total_tokens": 54630960}
|
| 8145 |
+
{"current_steps": 39735, "total_steps": 40000, "loss": 0.3608, "lr": 5.455533827297688e-09, "epoch": 1.2718455924716727, "percentage": 99.34, "elapsed_time": "1 day, 2:14:29", "remaining_time": "0:10:30", "throughput": 578.36, "total_tokens": 54637952}
|
| 8146 |
+
{"current_steps": 39740, "total_steps": 40000, "loss": 0.5567, "lr": 5.252373272018885e-09, "epoch": 1.2720056334421612, "percentage": 99.35, "elapsed_time": "1 day, 2:14:31", "remaining_time": "0:10:18", "throughput": 578.43, "total_tokens": 54644960}
|
| 8147 |
+
{"current_steps": 39745, "total_steps": 40000, "loss": 0.4495, "lr": 5.053067220925356e-09, "epoch": 1.2721656744126497, "percentage": 99.36, "elapsed_time": "1 day, 2:14:33", "remaining_time": "0:10:06", "throughput": 578.49, "total_tokens": 54651872}
|
| 8148 |
+
{"current_steps": 39750, "total_steps": 40000, "loss": 0.2064, "lr": 4.857615704759177e-09, "epoch": 1.272325715383138, "percentage": 99.38, "elapsed_time": "1 day, 2:14:36", "remaining_time": "0:09:54", "throughput": 578.55, "total_tokens": 54658704}
|
| 8149 |
+
{"current_steps": 39755, "total_steps": 40000, "loss": 0.4497, "lr": 4.666018753654577e-09, "epoch": 1.2724857563536265, "percentage": 99.39, "elapsed_time": "1 day, 2:14:38", "remaining_time": "0:09:42", "throughput": 578.61, "total_tokens": 54665632}
|
| 8150 |
+
{"current_steps": 39760, "total_steps": 40000, "loss": 0.3369, "lr": 4.478276397162917e-09, "epoch": 1.272645797324115, "percentage": 99.4, "elapsed_time": "1 day, 2:14:40", "remaining_time": "0:09:30", "throughput": 578.67, "total_tokens": 54672432}
|
| 8151 |
+
{"current_steps": 39765, "total_steps": 40000, "loss": 0.4007, "lr": 4.294388664233262e-09, "epoch": 1.2728058382946035, "percentage": 99.41, "elapsed_time": "1 day, 2:14:42", "remaining_time": "0:09:18", "throughput": 578.72, "total_tokens": 54679136}
|
| 8152 |
+
{"current_steps": 39770, "total_steps": 40000, "loss": 0.3231, "lr": 4.114355583223484e-09, "epoch": 1.272965879265092, "percentage": 99.42, "elapsed_time": "1 day, 2:14:44", "remaining_time": "0:09:06", "throughput": 578.78, "total_tokens": 54685456}
|
| 8153 |
+
{"current_steps": 39775, "total_steps": 40000, "loss": 0.4016, "lr": 3.9381771818974845e-09, "epoch": 1.2731259202355802, "percentage": 99.44, "elapsed_time": "1 day, 2:14:46", "remaining_time": "0:08:54", "throughput": 578.83, "total_tokens": 54691936}
|
| 8154 |
+
{"current_steps": 39780, "total_steps": 40000, "loss": 0.3896, "lr": 3.765853487427973e-09, "epoch": 1.2732859612060687, "percentage": 99.45, "elapsed_time": "1 day, 2:14:48", "remaining_time": "0:08:42", "throughput": 578.9, "total_tokens": 54698944}
|
| 8155 |
+
{"current_steps": 39785, "total_steps": 40000, "loss": 0.2715, "lr": 3.5973845263825857e-09, "epoch": 1.2734460021765572, "percentage": 99.46, "elapsed_time": "1 day, 2:14:50", "remaining_time": "0:08:30", "throughput": 578.96, "total_tokens": 54706176}
|
| 8156 |
+
{"current_steps": 39790, "total_steps": 40000, "loss": 0.3068, "lr": 3.4327703247488684e-09, "epoch": 1.2736060431470455, "percentage": 99.48, "elapsed_time": "1 day, 2:14:52", "remaining_time": "0:08:18", "throughput": 579.02, "total_tokens": 54713360}
|
| 8157 |
+
{"current_steps": 39795, "total_steps": 40000, "loss": 0.2251, "lr": 3.2720109079037443e-09, "epoch": 1.273766084117534, "percentage": 99.49, "elapsed_time": "1 day, 2:14:54", "remaining_time": "0:08:06", "throughput": 579.09, "total_tokens": 54720384}
|
| 8158 |
+
{"current_steps": 39800, "total_steps": 40000, "loss": 0.3353, "lr": 3.1151063006468193e-09, "epoch": 1.2739261250880225, "percentage": 99.5, "elapsed_time": "1 day, 2:14:56", "remaining_time": "0:07:54", "throughput": 579.15, "total_tokens": 54727600}
|
| 8159 |
+
{"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.4184603989124298, "epoch": 1.2739261250880225, "percentage": 99.5, "elapsed_time": "1 day, 2:21:30", "remaining_time": "0:07:56", "throughput": 576.74, "total_tokens": 54727600}
|
| 8160 |
+
{"current_steps": 39805, "total_steps": 40000, "loss": 0.394, "lr": 2.962056527169854e-09, "epoch": 1.274086166058511, "percentage": 99.51, "elapsed_time": "1 day, 2:21:35", "remaining_time": "0:07:44", "throughput": 576.79, "total_tokens": 54734624}
|