Training in progress, step 360
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +20 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 58745928
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98c0d599f98baea040f44b68356c7bc8643ce120638654c4554b3371935f22a1
|
| 3 |
size 58745928
|
trainer_log.jsonl
CHANGED
|
@@ -63,3 +63,23 @@
|
|
| 63 |
{"current_steps": 300, "total_steps": 1800, "loss": 0.0001, "lr": 4.933725658869267e-05, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:01:08", "remaining_time": "0:05:40", "throughput": 1382.03, "total_tokens": 94080}
|
| 64 |
{"current_steps": 305, "total_steps": 1800, "loss": 0.0003, "lr": 4.9280668057279014e-05, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:01:09", "remaining_time": "0:05:38", "throughput": 1385.12, "total_tokens": 95680}
|
| 65 |
{"current_steps": 310, "total_steps": 1800, "loss": 0.0001, "lr": 4.9221796730647516e-05, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:01:10", "remaining_time": "0:05:36", "throughput": 1387.7, "total_tokens": 97248}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
{"current_steps": 300, "total_steps": 1800, "loss": 0.0001, "lr": 4.933725658869267e-05, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:01:08", "remaining_time": "0:05:40", "throughput": 1382.03, "total_tokens": 94080}
|
| 64 |
{"current_steps": 305, "total_steps": 1800, "loss": 0.0003, "lr": 4.9280668057279014e-05, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:01:09", "remaining_time": "0:05:38", "throughput": 1385.12, "total_tokens": 95680}
|
| 65 |
{"current_steps": 310, "total_steps": 1800, "loss": 0.0001, "lr": 4.9221796730647516e-05, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:01:10", "remaining_time": "0:05:36", "throughput": 1387.7, "total_tokens": 97248}
|
| 66 |
+
{"current_steps": 315, "total_steps": 1800, "loss": 0.0001, "lr": 4.916064814370287e-05, "epoch": 3.5, "percentage": 17.5, "elapsed_time": "0:01:11", "remaining_time": "0:05:35", "throughput": 1389.73, "total_tokens": 98784}
|
| 67 |
+
{"current_steps": 320, "total_steps": 1800, "loss": 0.0, "lr": 4.9097228045450864e-05, "epoch": 3.5555555555555554, "percentage": 17.78, "elapsed_time": "0:01:12", "remaining_time": "0:05:33", "throughput": 1392.54, "total_tokens": 100384}
|
| 68 |
+
{"current_steps": 325, "total_steps": 1800, "loss": 0.0001, "lr": 4.9031542398457974e-05, "epoch": 3.611111111111111, "percentage": 18.06, "elapsed_time": "0:01:13", "remaining_time": "0:05:31", "throughput": 1394.91, "total_tokens": 101952}
|
| 69 |
+
{"current_steps": 330, "total_steps": 1800, "loss": 0.0, "lr": 4.896359737829071e-05, "epoch": 3.6666666666666665, "percentage": 18.33, "elapsed_time": "0:01:14", "remaining_time": "0:05:30", "throughput": 1397.1, "total_tokens": 103520}
|
| 70 |
+
{"current_steps": 335, "total_steps": 1800, "loss": 0.0, "lr": 4.889339937293508e-05, "epoch": 3.7222222222222223, "percentage": 18.61, "elapsed_time": "0:01:15", "remaining_time": "0:05:28", "throughput": 1400.03, "total_tokens": 105120}
|
| 71 |
+
{"current_steps": 340, "total_steps": 1800, "loss": 0.0, "lr": 4.8820954982195905e-05, "epoch": 3.7777777777777777, "percentage": 18.89, "elapsed_time": "0:01:16", "remaining_time": "0:05:26", "throughput": 1402.74, "total_tokens": 106720}
|
| 72 |
+
{"current_steps": 345, "total_steps": 1800, "loss": 0.003, "lr": 4.874627101707644e-05, "epoch": 3.8333333333333335, "percentage": 19.17, "elapsed_time": "0:01:17", "remaining_time": "0:05:25", "throughput": 1405.44, "total_tokens": 108320}
|
| 73 |
+
{"current_steps": 350, "total_steps": 1800, "loss": 0.0001, "lr": 4.8669354499137955e-05, "epoch": 3.888888888888889, "percentage": 19.44, "elapsed_time": "0:01:18", "remaining_time": "0:05:23", "throughput": 1407.59, "total_tokens": 109888}
|
| 74 |
+
{"current_steps": 355, "total_steps": 1800, "loss": 0.0001, "lr": 4.859021265983959e-05, "epoch": 3.9444444444444446, "percentage": 19.72, "elapsed_time": "0:01:19", "remaining_time": "0:05:21", "throughput": 1409.43, "total_tokens": 111424}
|
| 75 |
+
{"current_steps": 360, "total_steps": 1800, "loss": 0.0004, "lr": 4.850885293985853e-05, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:20", "remaining_time": "0:05:20", "throughput": 1411.44, "total_tokens": 113024}
|
| 76 |
+
{"current_steps": 360, "total_steps": 1800, "eval_loss": 0.13314101099967957, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:20", "remaining_time": "0:05:22", "throughput": 1400.76, "total_tokens": 113024}
|
| 77 |
+
{"current_steps": 365, "total_steps": 1800, "loss": 0.0001, "lr": 4.8425282988390376e-05, "epoch": 4.055555555555555, "percentage": 20.28, "elapsed_time": "0:01:23", "remaining_time": "0:05:26", "throughput": 1380.53, "total_tokens": 114624}
|
| 78 |
+
{"current_steps": 370, "total_steps": 1800, "loss": 0.0, "lr": 4.8339510662430046e-05, "epoch": 4.111111111111111, "percentage": 20.56, "elapsed_time": "0:01:24", "remaining_time": "0:05:24", "throughput": 1383.23, "total_tokens": 116224}
|
| 79 |
+
{"current_steps": 375, "total_steps": 1800, "loss": 0.0002, "lr": 4.825154402603308e-05, "epoch": 4.166666666666667, "percentage": 20.83, "elapsed_time": "0:01:25", "remaining_time": "0:05:23", "throughput": 1385.17, "total_tokens": 117760}
|
| 80 |
+
{"current_steps": 380, "total_steps": 1800, "loss": 0.0002, "lr": 4.816139134955746e-05, "epoch": 4.222222222222222, "percentage": 21.11, "elapsed_time": "0:01:26", "remaining_time": "0:05:21", "throughput": 1387.61, "total_tokens": 119360}
|
| 81 |
+
{"current_steps": 385, "total_steps": 1800, "loss": 0.0, "lr": 4.806906110888606e-05, "epoch": 4.277777777777778, "percentage": 21.39, "elapsed_time": "0:01:27", "remaining_time": "0:05:21", "throughput": 1384.23, "total_tokens": 120960}
|
| 82 |
+
{"current_steps": 390, "total_steps": 1800, "loss": 0.0, "lr": 4.797456198462979e-05, "epoch": 4.333333333333333, "percentage": 21.67, "elapsed_time": "0:01:28", "remaining_time": "0:05:19", "throughput": 1386.31, "total_tokens": 122528}
|
| 83 |
+
{"current_steps": 395, "total_steps": 1800, "loss": 0.0, "lr": 4.7877902861311446e-05, "epoch": 4.388888888888889, "percentage": 21.94, "elapsed_time": "0:01:29", "remaining_time": "0:05:17", "throughput": 1388.37, "total_tokens": 124096}
|
| 84 |
+
{"current_steps": 400, "total_steps": 1800, "loss": 0.0, "lr": 4.777909282653042e-05, "epoch": 4.444444444444445, "percentage": 22.22, "elapsed_time": "0:01:30", "remaining_time": "0:05:16", "throughput": 1390.79, "total_tokens": 125696}
|
| 85 |
+
{"current_steps": 405, "total_steps": 1800, "loss": 0.0, "lr": 4.7678141170108345e-05, "epoch": 4.5, "percentage": 22.5, "elapsed_time": "0:01:31", "remaining_time": "0:05:14", "throughput": 1392.89, "total_tokens": 127264}
|