Training in progress, step 1600
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +29 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8388736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf58ed33c0af14d1a87a2e6b18383d3eaf2c93297f233349cb078c5d46a690b1
|
| 3 |
size 8388736
|
trainer_log.jsonl
CHANGED
|
@@ -300,3 +300,32 @@
|
|
| 300 |
{"current_steps": 1455, "total_steps": 1600, "loss": 0.0002, "lr": 2.5150436899061494e-07, "epoch": 18.1875, "percentage": 90.94, "elapsed_time": "0:02:31", "remaining_time": "0:00:15", "throughput": 3000.23, "total_tokens": 455936}
|
| 301 |
{"current_steps": 1460, "total_steps": 1600, "loss": 0.0002, "lr": 2.3470678346851517e-07, "epoch": 18.25, "percentage": 91.25, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3001.92, "total_tokens": 457536}
|
| 302 |
{"current_steps": 1465, "total_steps": 1600, "loss": 0.0001, "lr": 2.1847622018482283e-07, "epoch": 18.3125, "percentage": 91.56, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3003.26, "total_tokens": 459072}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
{"current_steps": 1455, "total_steps": 1600, "loss": 0.0002, "lr": 2.5150436899061494e-07, "epoch": 18.1875, "percentage": 90.94, "elapsed_time": "0:02:31", "remaining_time": "0:00:15", "throughput": 3000.23, "total_tokens": 455936}
|
| 301 |
{"current_steps": 1460, "total_steps": 1600, "loss": 0.0002, "lr": 2.3470678346851517e-07, "epoch": 18.25, "percentage": 91.25, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3001.92, "total_tokens": 457536}
|
| 302 |
{"current_steps": 1465, "total_steps": 1600, "loss": 0.0001, "lr": 2.1847622018482283e-07, "epoch": 18.3125, "percentage": 91.56, "elapsed_time": "0:02:32", "remaining_time": "0:00:14", "throughput": 3003.26, "total_tokens": 459072}
|
| 303 |
+
{"current_steps": 1470, "total_steps": 1600, "loss": 0.0001, "lr": 2.028146104142581e-07, "epoch": 18.375, "percentage": 91.88, "elapsed_time": "0:02:33", "remaining_time": "0:00:13", "throughput": 3004.72, "total_tokens": 460640}
|
| 304 |
+
{"current_steps": 1475, "total_steps": 1600, "loss": 0.0002, "lr": 1.8772381773176417e-07, "epoch": 18.4375, "percentage": 92.19, "elapsed_time": "0:02:33", "remaining_time": "0:00:13", "throughput": 3006.21, "total_tokens": 462208}
|
| 305 |
+
{"current_steps": 1480, "total_steps": 1600, "loss": 0.0001, "lr": 1.7320563779075595e-07, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:02:34", "remaining_time": "0:00:12", "throughput": 3007.54, "total_tokens": 463744}
|
| 306 |
+
{"current_steps": 1485, "total_steps": 1600, "loss": 0.0002, "lr": 1.5926179810946185e-07, "epoch": 18.5625, "percentage": 92.81, "elapsed_time": "0:02:34", "remaining_time": "0:00:11", "throughput": 3009.21, "total_tokens": 465344}
|
| 307 |
+
{"current_steps": 1490, "total_steps": 1600, "loss": 0.0001, "lr": 1.4589395786535954e-07, "epoch": 18.625, "percentage": 93.12, "elapsed_time": "0:02:35", "remaining_time": "0:00:11", "throughput": 3010.51, "total_tokens": 466880}
|
| 308 |
+
{"current_steps": 1495, "total_steps": 1600, "loss": 0.0002, "lr": 1.331037076977576e-07, "epoch": 18.6875, "percentage": 93.44, "elapsed_time": "0:02:35", "remaining_time": "0:00:10", "throughput": 3011.99, "total_tokens": 468448}
|
| 309 |
+
{"current_steps": 1500, "total_steps": 1600, "loss": 0.0002, "lr": 1.2089256951851923e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "0:02:35", "remaining_time": "0:00:10", "throughput": 3013.63, "total_tokens": 470048}
|
| 310 |
+
{"current_steps": 1505, "total_steps": 1600, "loss": 0.0001, "lr": 1.0926199633097156e-07, "epoch": 18.8125, "percentage": 94.06, "elapsed_time": "0:02:36", "remaining_time": "0:00:09", "throughput": 3014.93, "total_tokens": 471584}
|
| 311 |
+
{"current_steps": 1510, "total_steps": 1600, "loss": 0.0002, "lr": 9.821337205701664e-08, "epoch": 18.875, "percentage": 94.38, "elapsed_time": "0:02:36", "remaining_time": "0:00:09", "throughput": 3016.37, "total_tokens": 473152}
|
| 312 |
+
{"current_steps": 1515, "total_steps": 1600, "loss": 0.0004, "lr": 8.77480113724516e-08, "epoch": 18.9375, "percentage": 94.69, "elapsed_time": "0:02:37", "remaining_time": "0:00:08", "throughput": 3017.8, "total_tokens": 474720}
|
| 313 |
+
{"current_steps": 1520, "total_steps": 1600, "loss": 0.0001, "lr": 7.786715955054202e-08, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:02:37", "remaining_time": "0:00:08", "throughput": 3018.76, "total_tokens": 476320}
|
| 314 |
+
{"current_steps": 1525, "total_steps": 1600, "loss": 0.0002, "lr": 6.857199231384282e-08, "epoch": 19.0625, "percentage": 95.31, "elapsed_time": "0:02:38", "remaining_time": "0:00:07", "throughput": 3018.56, "total_tokens": 477824}
|
| 315 |
+
{"current_steps": 1530, "total_steps": 1600, "loss": 0.0002, "lr": 5.986361569430166e-08, "epoch": 19.125, "percentage": 95.62, "elapsed_time": "0:02:38", "remaining_time": "0:00:07", "throughput": 3019.8, "total_tokens": 479360}
|
| 316 |
+
{"current_steps": 1535, "total_steps": 1600, "loss": 0.0003, "lr": 5.174306590164879e-08, "epoch": 19.1875, "percentage": 95.94, "elapsed_time": "0:02:39", "remaining_time": "0:00:06", "throughput": 3021.04, "total_tokens": 480896}
|
| 317 |
+
{"current_steps": 1540, "total_steps": 1600, "loss": 0.0002, "lr": 4.42113092001023e-08, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:02:39", "remaining_time": "0:00:06", "throughput": 3022.45, "total_tokens": 482464}
|
| 318 |
+
{"current_steps": 1545, "total_steps": 1600, "loss": 0.0001, "lr": 3.726924179339009e-08, "epoch": 19.3125, "percentage": 96.56, "elapsed_time": "0:02:40", "remaining_time": "0:00:05", "throughput": 3023.84, "total_tokens": 484032}
|
| 319 |
+
{"current_steps": 1550, "total_steps": 1600, "loss": 0.0001, "lr": 3.09176897181096e-08, "epoch": 19.375, "percentage": 96.88, "elapsed_time": "0:02:40", "remaining_time": "0:00:05", "throughput": 3025.41, "total_tokens": 485632}
|
| 320 |
+
{"current_steps": 1555, "total_steps": 1600, "loss": 0.0002, "lr": 2.515740874544148e-08, "epoch": 19.4375, "percentage": 97.19, "elapsed_time": "0:02:40", "remaining_time": "0:00:04", "throughput": 3026.97, "total_tokens": 487232}
|
| 321 |
+
{"current_steps": 1560, "total_steps": 1600, "loss": 0.0002, "lr": 1.9989084291216487e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:02:41", "remaining_time": "0:00:04", "throughput": 3028.5, "total_tokens": 488832}
|
| 322 |
+
{"current_steps": 1565, "total_steps": 1600, "loss": 0.0001, "lr": 1.541333133436018e-08, "epoch": 19.5625, "percentage": 97.81, "elapsed_time": "0:02:41", "remaining_time": "0:00:03", "throughput": 3029.86, "total_tokens": 490400}
|
| 323 |
+
{"current_steps": 1570, "total_steps": 1600, "loss": 0.0001, "lr": 1.1430694343715354e-08, "epoch": 19.625, "percentage": 98.12, "elapsed_time": "0:02:42", "remaining_time": "0:00:03", "throughput": 3031.05, "total_tokens": 491936}
|
| 324 |
+
{"current_steps": 1575, "total_steps": 1600, "loss": 0.0002, "lr": 8.041647213256066e-09, "epoch": 19.6875, "percentage": 98.44, "elapsed_time": "0:02:42", "remaining_time": "0:00:02", "throughput": 3032.4, "total_tokens": 493504}
|
| 325 |
+
{"current_steps": 1580, "total_steps": 1600, "loss": 0.0001, "lr": 5.246593205699424e-09, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:02:43", "remaining_time": "0:00:02", "throughput": 3033.92, "total_tokens": 495104}
|
| 326 |
+
{"current_steps": 1585, "total_steps": 1600, "loss": 0.0002, "lr": 3.0458649045211897e-09, "epoch": 19.8125, "percentage": 99.06, "elapsed_time": "0:02:43", "remaining_time": "0:00:01", "throughput": 3035.24, "total_tokens": 496672}
|
| 327 |
+
{"current_steps": 1590, "total_steps": 1600, "loss": 0.0001, "lr": 1.4397241743813185e-09, "epoch": 19.875, "percentage": 99.38, "elapsed_time": "0:02:44", "remaining_time": "0:00:01", "throughput": 3036.57, "total_tokens": 498240}
|
| 328 |
+
{"current_steps": 1595, "total_steps": 1600, "loss": 0.0002, "lr": 4.283621299649987e-10, "epoch": 19.9375, "percentage": 99.69, "elapsed_time": "0:02:44", "remaining_time": "0:00:00", "throughput": 3037.9, "total_tokens": 499808}
|
| 329 |
+
{"current_steps": 1600, "total_steps": 1600, "loss": 0.0003, "lr": 1.189911324084303e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:45", "remaining_time": "0:00:00", "throughput": 3038.86, "total_tokens": 501440}
|
| 330 |
+
{"current_steps": 1600, "total_steps": 1600, "eval_loss": 1.093362808227539, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:45", "remaining_time": "0:00:00", "throughput": 3021.98, "total_tokens": 501440}
|
| 331 |
+
{"current_steps": 1600, "total_steps": 1600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:02:47", "remaining_time": "0:00:00", "throughput": 3000.57, "total_tokens": 501440}
|