Training in progress, step 900
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +20 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1074144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a6df8c0d61b2d95b348ce7e42492b09b23f0037e132c1d50414f7c8c07d765d
|
| 3 |
size 1074144
|
trainer_log.jsonl
CHANGED
|
@@ -179,3 +179,23 @@
|
|
| 179 |
{"current_steps": 810, "total_steps": 900, "loss": 0.0019, "lr": 1.541024301445404e-06, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:02:18", "remaining_time": "0:00:15", "throughput": 1826.1, "total_tokens": 253696}
|
| 180 |
{"current_steps": 810, "total_steps": 900, "eval_loss": 0.14213652908802032, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:02:19", "remaining_time": "0:00:15", "throughput": 1818.62, "total_tokens": 253696}
|
| 181 |
{"current_steps": 815, "total_steps": 900, "loss": 0.0143, "lr": 1.3778640374027985e-06, "epoch": 9.055555555555555, "percentage": 90.56, "elapsed_time": "0:02:21", "remaining_time": "0:00:14", "throughput": 1809.23, "total_tokens": 255296}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
{"current_steps": 810, "total_steps": 900, "loss": 0.0019, "lr": 1.541024301445404e-06, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:02:18", "remaining_time": "0:00:15", "throughput": 1826.1, "total_tokens": 253696}
|
| 180 |
{"current_steps": 810, "total_steps": 900, "eval_loss": 0.14213652908802032, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:02:19", "remaining_time": "0:00:15", "throughput": 1818.62, "total_tokens": 253696}
|
| 181 |
{"current_steps": 815, "total_steps": 900, "loss": 0.0143, "lr": 1.3778640374027985e-06, "epoch": 9.055555555555555, "percentage": 90.56, "elapsed_time": "0:02:21", "remaining_time": "0:00:14", "throughput": 1809.23, "total_tokens": 255296}
|
| 182 |
+
{"current_steps": 820, "total_steps": 900, "loss": 0.0061, "lr": 1.2235870926211619e-06, "epoch": 9.11111111111111, "percentage": 91.11, "elapsed_time": "0:02:21", "remaining_time": "0:00:13", "throughput": 1811.53, "total_tokens": 256896}
|
| 183 |
+
{"current_steps": 825, "total_steps": 900, "loss": 0.0041, "lr": 1.0782514843499653e-06, "epoch": 9.166666666666666, "percentage": 91.67, "elapsed_time": "0:02:22", "remaining_time": "0:00:12", "throughput": 1813.46, "total_tokens": 258432}
|
| 184 |
+
{"current_steps": 830, "total_steps": 900, "loss": 0.0018, "lr": 9.419118673676924e-07, "epoch": 9.222222222222221, "percentage": 92.22, "elapsed_time": "0:02:23", "remaining_time": "0:00:12", "throughput": 1815.59, "total_tokens": 260000}
|
| 185 |
+
{"current_steps": 835, "total_steps": 900, "loss": 0.1567, "lr": 8.146195134284052e-07, "epoch": 9.277777777777779, "percentage": 92.78, "elapsed_time": "0:02:23", "remaining_time": "0:00:11", "throughput": 1817.73, "total_tokens": 261568}
|
| 186 |
+
{"current_steps": 840, "total_steps": 900, "loss": 0.0006, "lr": 6.964222919805391e-07, "epoch": 9.333333333333334, "percentage": 93.33, "elapsed_time": "0:02:24", "remaining_time": "0:00:10", "throughput": 1820.27, "total_tokens": 263200}
|
| 187 |
+
{"current_steps": 845, "total_steps": 900, "loss": 0.012, "lr": 5.87364652165176e-07, "epoch": 9.38888888888889, "percentage": 93.89, "elapsed_time": "0:02:25", "remaining_time": "0:00:09", "throughput": 1822.13, "total_tokens": 264736}
|
| 188 |
+
{"current_steps": 850, "total_steps": 900, "loss": 0.0011, "lr": 4.874876061005173e-07, "epoch": 9.444444444444445, "percentage": 94.44, "elapsed_time": "0:02:25", "remaining_time": "0:00:08", "throughput": 1824.19, "total_tokens": 266304}
|
| 189 |
+
{"current_steps": 855, "total_steps": 900, "loss": 0.0005, "lr": 3.9682871345891883e-07, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:02:26", "remaining_time": "0:00:07", "throughput": 1826.02, "total_tokens": 267840}
|
| 190 |
+
{"current_steps": 855, "total_steps": 900, "eval_loss": 0.14488616585731506, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:02:27", "remaining_time": "0:00:07", "throughput": 1818.8, "total_tokens": 267840}
|
| 191 |
+
{"current_steps": 860, "total_steps": 900, "loss": 0.0043, "lr": 3.1542206734221924e-07, "epoch": 9.555555555555555, "percentage": 95.56, "elapsed_time": "0:02:28", "remaining_time": "0:00:06", "throughput": 1813.37, "total_tokens": 269376}
|
| 192 |
+
{"current_steps": 865, "total_steps": 900, "loss": 0.0163, "lr": 2.4329828146074095e-07, "epoch": 9.61111111111111, "percentage": 96.11, "elapsed_time": "0:02:29", "remaining_time": "0:00:06", "throughput": 1815.44, "total_tokens": 270944}
|
| 193 |
+
{"current_steps": 870, "total_steps": 900, "loss": 0.0264, "lr": 1.8048447862070718e-07, "epoch": 9.666666666666666, "percentage": 96.67, "elapsed_time": "0:02:29", "remaining_time": "0:00:05", "throughput": 1817.04, "total_tokens": 272448}
|
| 194 |
+
{"current_steps": 875, "total_steps": 900, "loss": 0.0237, "lr": 1.2700428052447033e-07, "epoch": 9.722222222222221, "percentage": 97.22, "elapsed_time": "0:02:30", "remaining_time": "0:00:04", "throughput": 1818.81, "total_tokens": 273984}
|
| 195 |
+
{"current_steps": 880, "total_steps": 900, "loss": 0.0033, "lr": 8.28777988873486e-08, "epoch": 9.777777777777779, "percentage": 97.78, "elapsed_time": "0:02:31", "remaining_time": "0:00:03", "throughput": 1820.59, "total_tokens": 275520}
|
| 196 |
+
{"current_steps": 885, "total_steps": 900, "loss": 0.0017, "lr": 4.8121627874450625e-08, "epoch": 9.833333333333334, "percentage": 98.33, "elapsed_time": "0:02:32", "remaining_time": "0:00:02", "throughput": 1823.0, "total_tokens": 277152}
|
| 197 |
+
{"current_steps": 890, "total_steps": 900, "loss": 0.0624, "lr": 2.2748837860270267e-08, "epoch": 9.88888888888889, "percentage": 98.89, "elapsed_time": "0:02:32", "remaining_time": "0:00:01", "throughput": 1824.74, "total_tokens": 278688}
|
| 198 |
+
{"current_steps": 895, "total_steps": 900, "loss": 0.002, "lr": 6.768970513457151e-09, "epoch": 9.944444444444445, "percentage": 99.44, "elapsed_time": "0:02:33", "remaining_time": "0:00:00", "throughput": 1826.82, "total_tokens": 280256}
|
| 199 |
+
{"current_steps": 900, "total_steps": 900, "loss": 0.0235, "lr": 1.8803520859811406e-10, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:02:34", "remaining_time": "0:00:00", "throughput": 1828.25, "total_tokens": 281856}
|
| 200 |
+
{"current_steps": 900, "total_steps": 900, "eval_loss": 0.1471279412508011, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:02:34", "remaining_time": "0:00:00", "throughput": 1821.48, "total_tokens": 281856}
|
| 201 |
+
{"current_steps": 900, "total_steps": 900, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:02:35", "remaining_time": "0:00:00", "throughput": 1812.53, "total_tokens": 281856}
|