Training in progress, step 20000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1423,6 +1423,10 @@ You can finetune this model on your own dataset.
|
|
| 1423 |
| 0.3481 | 19700 | 0.3363 |
|
| 1424 |
| 0.3490 | 19750 | 0.3484 |
|
| 1425 |
| 0.3499 | 19800 | 0.3719 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1426 |
|
| 1427 |
</details>
|
| 1428 |
|
|
|
|
| 1423 |
| 0.3481 | 19700 | 0.3363 |
|
| 1424 |
| 0.3490 | 19750 | 0.3484 |
|
| 1425 |
| 0.3499 | 19800 | 0.3719 |
|
| 1426 |
+
| 0.3508 | 19850 | 0.3226 |
|
| 1427 |
+
| 0.3516 | 19900 | 0.3636 |
|
| 1428 |
+
| 0.3525 | 19950 | 0.3704 |
|
| 1429 |
+
| 0.3534 | 20000 | 0.3459 |
|
| 1430 |
|
| 1431 |
</details>
|
| 1432 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7072d59f5aacf209413bfe1b68393d31e61f586cffca0291db2c96c4018ed70
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ace23ee1b367e732fb2aa3f1a05392e0e438a9535dad79c057fb1b8e02b9d68
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14dd25fc849fe74831fabbde14139b26a0f4aad6b8e087440c778d9e11ab8d7e
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:313073d5ed3020eae2b8815f959d08ffdd33605f3a4a238421bfb13a6954c251
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde16ded14375bc47c997c2d556dc8340b61e53a3892d07fbf99a72d763ffa8d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2780,6 +2780,34 @@
|
|
| 2780 |
"learning_rate": 3.6124364336062515e-05,
|
| 2781 |
"loss": 0.3719,
|
| 2782 |
"step": 19800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2783 |
}
|
| 2784 |
],
|
| 2785 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.3534130868866074,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 20000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2780 |
"learning_rate": 3.6124364336062515e-05,
|
| 2781 |
"loss": 0.3719,
|
| 2782 |
"step": 19800
|
| 2783 |
+
},
|
| 2784 |
+
{
|
| 2785 |
+
"epoch": 0.35076248873495786,
|
| 2786 |
+
"grad_norm": 4.908963680267334,
|
| 2787 |
+
"learning_rate": 3.607527831772398e-05,
|
| 2788 |
+
"loss": 0.3226,
|
| 2789 |
+
"step": 19850
|
| 2790 |
+
},
|
| 2791 |
+
{
|
| 2792 |
+
"epoch": 0.3516460214521744,
|
| 2793 |
+
"grad_norm": 1.5221627950668335,
|
| 2794 |
+
"learning_rate": 3.6026192299385444e-05,
|
| 2795 |
+
"loss": 0.3636,
|
| 2796 |
+
"step": 19900
|
| 2797 |
+
},
|
| 2798 |
+
{
|
| 2799 |
+
"epoch": 0.3525295541693909,
|
| 2800 |
+
"grad_norm": 1.8089814186096191,
|
| 2801 |
+
"learning_rate": 3.597710628104691e-05,
|
| 2802 |
+
"loss": 0.3704,
|
| 2803 |
+
"step": 19950
|
| 2804 |
+
},
|
| 2805 |
+
{
|
| 2806 |
+
"epoch": 0.3534130868866074,
|
| 2807 |
+
"grad_norm": 2.786560535430908,
|
| 2808 |
+
"learning_rate": 3.5928020262708373e-05,
|
| 2809 |
+
"loss": 0.3459,
|
| 2810 |
+
"step": 20000
|
| 2811 |
}
|
| 2812 |
],
|
| 2813 |
"logging_steps": 50,
|