Training in progress, step 12800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1277,6 +1277,10 @@ You can finetune this model on your own dataset.
|
|
| 1277 |
| 0.2209 | 12500 | 0.3775 |
|
| 1278 |
| 0.2218 | 12550 | 0.3695 |
|
| 1279 |
| 0.2227 | 12600 | 0.3545 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1280 |
|
| 1281 |
|
| 1282 |
### Framework Versions
|
|
|
|
| 1277 |
| 0.2209 | 12500 | 0.3775 |
|
| 1278 |
| 0.2218 | 12550 | 0.3695 |
|
| 1279 |
| 0.2227 | 12600 | 0.3545 |
|
| 1280 |
+
| 0.2235 | 12650 | 0.3548 |
|
| 1281 |
+
| 0.2244 | 12700 | 0.4847 |
|
| 1282 |
+
| 0.2253 | 12750 | 0.4 |
|
| 1283 |
+
| 0.2262 | 12800 | 0.4755 |
|
| 1284 |
|
| 1285 |
|
| 1286 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaffa127a39fd901b451a02082dd73e610a44a04250335e841bd1c868ed76e7e
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3099f71f55b4d64c3a23404f7faf834041dd15d4ee0a70f3ac8b4ad68054749c
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17530db209b84526ccd898abb8b9457fb6227f11278068f45b5d61574f612471
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ad444446e22ff06cf0b10a8954472ffaf9e2730e13a164edba0cc9cb5b081cd
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef5f6a8cf979ba7b777c43842eed460a7fa788b746409fc99d6b1fe7576d8044
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1772,6 +1772,34 @@
|
|
| 1772 |
"learning_rate": 4.318882409534468e-05,
|
| 1773 |
"loss": 0.3545,
|
| 1774 |
"step": 12600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1775 |
}
|
| 1776 |
],
|
| 1777 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.22618437560742874,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 12800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1772 |
"learning_rate": 4.318882409534468e-05,
|
| 1773 |
"loss": 0.3545,
|
| 1774 |
"step": 12600
|
| 1775 |
+
},
|
| 1776 |
+
{
|
| 1777 |
+
"epoch": 0.22353377745577918,
|
| 1778 |
+
"grad_norm": 1.8885284662246704,
|
| 1779 |
+
"learning_rate": 4.313973807700615e-05,
|
| 1780 |
+
"loss": 0.3548,
|
| 1781 |
+
"step": 12650
|
| 1782 |
+
},
|
| 1783 |
+
{
|
| 1784 |
+
"epoch": 0.2244173101729957,
|
| 1785 |
+
"grad_norm": 1.8508330583572388,
|
| 1786 |
+
"learning_rate": 4.3090652058667615e-05,
|
| 1787 |
+
"loss": 0.4847,
|
| 1788 |
+
"step": 12700
|
| 1789 |
+
},
|
| 1790 |
+
{
|
| 1791 |
+
"epoch": 0.22530084289021224,
|
| 1792 |
+
"grad_norm": 2.1445882320404053,
|
| 1793 |
+
"learning_rate": 4.304156604032907e-05,
|
| 1794 |
+
"loss": 0.4,
|
| 1795 |
+
"step": 12750
|
| 1796 |
+
},
|
| 1797 |
+
{
|
| 1798 |
+
"epoch": 0.22618437560742874,
|
| 1799 |
+
"grad_norm": 1.721024990081787,
|
| 1800 |
+
"learning_rate": 4.299248002199054e-05,
|
| 1801 |
+
"loss": 0.4755,
|
| 1802 |
+
"step": 12800
|
| 1803 |
}
|
| 1804 |
],
|
| 1805 |
"logging_steps": 50,
|