Training in progress, step 13400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1187,6 +1187,8 @@ You can finetune this model on your own dataset.
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
|
|
|
|
|
|
| 1190 |
| Epoch | Step | Training Loss |
|
| 1191 |
|:------:|:-----:|:-------------:|
|
| 1192 |
| 0.1458 | 8250 | 0.4688 |
|
|
@@ -1289,7 +1291,12 @@ You can finetune this model on your own dataset.
|
|
| 1289 |
| 0.2315 | 13100 | 0.4359 |
|
| 1290 |
| 0.2324 | 13150 | 0.3702 |
|
| 1291 |
| 0.2333 | 13200 | 0.5026 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1292 |
|
|
|
|
| 1293 |
|
| 1294 |
### Framework Versions
|
| 1295 |
- Python: 3.11.13
|
|
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
| 1190 |
+
<details><summary>Click to expand</summary>
|
| 1191 |
+
|
| 1192 |
| Epoch | Step | Training Loss |
|
| 1193 |
|:------:|:-----:|:-------------:|
|
| 1194 |
| 0.1458 | 8250 | 0.4688 |
|
|
|
|
| 1291 |
| 0.2315 | 13100 | 0.4359 |
|
| 1292 |
| 0.2324 | 13150 | 0.3702 |
|
| 1293 |
| 0.2333 | 13200 | 0.5026 |
|
| 1294 |
+
| 0.2341 | 13250 | 0.5201 |
|
| 1295 |
+
| 0.2350 | 13300 | 0.3857 |
|
| 1296 |
+
| 0.2359 | 13350 | 0.3555 |
|
| 1297 |
+
| 0.2368 | 13400 | 0.381 |
|
| 1298 |
|
| 1299 |
+
</details>
|
| 1300 |
|
| 1301 |
### Framework Versions
|
| 1302 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1ff219d2dc182bff9938951ac2d922c87f8b3382fe905eda8bb33e8c98eb346
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbe0f24feef012ade435d0955f499b908ca4147d4a231048c8182b66d61b6f43
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c735c57cb6abf0ceb0d281a7f9e69dc9a61b723c825d122108ee089e4f92d40
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:578b0f8cd0a36e27a4c0005bc9769962acef123517226f60d74acf957afcc72c
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c54be013f521e475446bbdac4929f2f53d5e8b1200009f671ab53144a213230f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1856,6 +1856,34 @@
|
|
| 1856 |
"learning_rate": 4.260077359564902e-05,
|
| 1857 |
"loss": 0.5026,
|
| 1858 |
"step": 13200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1859 |
}
|
| 1860 |
],
|
| 1861 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.23678676821402697,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 13400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1856 |
"learning_rate": 4.260077359564902e-05,
|
| 1857 |
"loss": 0.5026,
|
| 1858 |
"step": 13200
|
| 1859 |
+
},
|
| 1860 |
+
{
|
| 1861 |
+
"epoch": 0.2341361700623774,
|
| 1862 |
+
"grad_norm": 1.818076252937317,
|
| 1863 |
+
"learning_rate": 4.2551687577310476e-05,
|
| 1864 |
+
"loss": 0.5201,
|
| 1865 |
+
"step": 13250
|
| 1866 |
+
},
|
| 1867 |
+
{
|
| 1868 |
+
"epoch": 0.23501970277959394,
|
| 1869 |
+
"grad_norm": 1.9688682556152344,
|
| 1870 |
+
"learning_rate": 4.250260155897194e-05,
|
| 1871 |
+
"loss": 0.3857,
|
| 1872 |
+
"step": 13300
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"epoch": 0.23590323549681044,
|
| 1876 |
+
"grad_norm": 2.4908297061920166,
|
| 1877 |
+
"learning_rate": 4.245351554063341e-05,
|
| 1878 |
+
"loss": 0.3555,
|
| 1879 |
+
"step": 13350
|
| 1880 |
+
},
|
| 1881 |
+
{
|
| 1882 |
+
"epoch": 0.23678676821402697,
|
| 1883 |
+
"grad_norm": 1.9015276432037354,
|
| 1884 |
+
"learning_rate": 4.240442952229487e-05,
|
| 1885 |
+
"loss": 0.381,
|
| 1886 |
+
"step": 13400
|
| 1887 |
}
|
| 1888 |
],
|
| 1889 |
"logging_steps": 50,
|