Training in progress, step 18800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1399,6 +1399,10 @@ You can finetune this model on your own dataset.
|
|
| 1399 |
| 0.3269 | 18500 | 0.3856 |
|
| 1400 |
| 0.3278 | 18550 | 0.3575 |
|
| 1401 |
| 0.3287 | 18600 | 0.347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1402 |
|
| 1403 |
</details>
|
| 1404 |
|
|
|
|
| 1399 |
| 0.3269 | 18500 | 0.3856 |
|
| 1400 |
| 0.3278 | 18550 | 0.3575 |
|
| 1401 |
| 0.3287 | 18600 | 0.347 |
|
| 1402 |
+
| 0.3296 | 18650 | 0.3195 |
|
| 1403 |
+
| 0.3304 | 18700 | 0.3543 |
|
| 1404 |
+
| 0.3313 | 18750 | 0.3642 |
|
| 1405 |
+
| 0.3322 | 18800 | 0.3415 |
|
| 1406 |
|
| 1407 |
</details>
|
| 1408 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6283a957a66c184eb3662df7aa262cd154fb1dcee802f75857776d94240bc13
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78292edbaee380ac230c50b2f8b68c5dd37e7df2ddfa15b2c43a017c23edfbeb
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83eb2f002d2d9da0dc4755fcda636e68d5b4a41e8c2e5a2b8c82d014a65d58ff
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c934cf056ab14aad09651705c56e19636a65538c7a4a6a0401dc0dfa92a2b37
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0b05eb17181bcecd1e03b83742ebacffcd8b0f6bb6d68e2982a23c3f445018e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2612,6 +2612,34 @@
|
|
| 2612 |
"learning_rate": 3.730144705582062e-05,
|
| 2613 |
"loss": 0.347,
|
| 2614 |
"step": 18600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2615 |
}
|
| 2616 |
],
|
| 2617 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.332208301673411,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 18800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2612 |
"learning_rate": 3.730144705582062e-05,
|
| 2613 |
"loss": 0.347,
|
| 2614 |
"step": 18600
|
| 2615 |
+
},
|
| 2616 |
+
{
|
| 2617 |
+
"epoch": 0.3295577035217614,
|
| 2618 |
+
"grad_norm": 1.8076531887054443,
|
| 2619 |
+
"learning_rate": 3.7252361037482085e-05,
|
| 2620 |
+
"loss": 0.3195,
|
| 2621 |
+
"step": 18650
|
| 2622 |
+
},
|
| 2623 |
+
{
|
| 2624 |
+
"epoch": 0.3304412362389779,
|
| 2625 |
+
"grad_norm": 1.8082791566848755,
|
| 2626 |
+
"learning_rate": 3.720327501914355e-05,
|
| 2627 |
+
"loss": 0.3543,
|
| 2628 |
+
"step": 18700
|
| 2629 |
+
},
|
| 2630 |
+
{
|
| 2631 |
+
"epoch": 0.33132476895619445,
|
| 2632 |
+
"grad_norm": 1.3712306022644043,
|
| 2633 |
+
"learning_rate": 3.7154189000805014e-05,
|
| 2634 |
+
"loss": 0.3642,
|
| 2635 |
+
"step": 18750
|
| 2636 |
+
},
|
| 2637 |
+
{
|
| 2638 |
+
"epoch": 0.332208301673411,
|
| 2639 |
+
"grad_norm": 1.5654476881027222,
|
| 2640 |
+
"learning_rate": 3.710510298246648e-05,
|
| 2641 |
+
"loss": 0.3415,
|
| 2642 |
+
"step": 18800
|
| 2643 |
}
|
| 2644 |
],
|
| 2645 |
"logging_steps": 50,
|