Training in progress, step 18600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1395,6 +1395,10 @@ You can finetune this model on your own dataset.
|
|
| 1395 |
| 0.3234 | 18300 | 0.31 |
|
| 1396 |
| 0.3243 | 18350 | 0.306 |
|
| 1397 |
| 0.3251 | 18400 | 0.3426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1398 |
|
| 1399 |
</details>
|
| 1400 |
|
|
|
|
| 1395 |
| 0.3234 | 18300 | 0.31 |
|
| 1396 |
| 0.3243 | 18350 | 0.306 |
|
| 1397 |
| 0.3251 | 18400 | 0.3426 |
|
| 1398 |
+
| 0.3260 | 18450 | 0.2807 |
|
| 1399 |
+
| 0.3269 | 18500 | 0.3856 |
|
| 1400 |
+
| 0.3278 | 18550 | 0.3575 |
|
| 1401 |
+
| 0.3287 | 18600 | 0.347 |
|
| 1402 |
|
| 1403 |
</details>
|
| 1404 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c659f7470485136b016f37853007297ca9974233845b53c7106a151f1185c5ff
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5134fd47d1ddef6b8fafbfb0e5b5ac6fce8d4885f9aa6d68d2550fe5fe73399a
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8876eb9659d218313ffcb704a4ab5032bff1b5e2e75c2dbc7a464331691d0ba1
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:346a0b991711eadeb589f86e15866b208715d0ef237bad5b888484a4e3892901
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d4a93a47d6ad046cc4b29ddb3b4c48d6d603705a4f414700fa29b5fa5270c50
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2584,6 +2584,34 @@
|
|
| 2584 |
"learning_rate": 3.749779112917477e-05,
|
| 2585 |
"loss": 0.3426,
|
| 2586 |
"step": 18400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2587 |
}
|
| 2588 |
],
|
| 2589 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.3286741708045449,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 18600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2584 |
"learning_rate": 3.749779112917477e-05,
|
| 2585 |
"loss": 0.3426,
|
| 2586 |
"step": 18400
|
| 2587 |
+
},
|
| 2588 |
+
{
|
| 2589 |
+
"epoch": 0.32602357265289533,
|
| 2590 |
+
"grad_norm": 3.006504535675049,
|
| 2591 |
+
"learning_rate": 3.744870511083623e-05,
|
| 2592 |
+
"loss": 0.2807,
|
| 2593 |
+
"step": 18450
|
| 2594 |
+
},
|
| 2595 |
+
{
|
| 2596 |
+
"epoch": 0.32690710537011186,
|
| 2597 |
+
"grad_norm": 1.5666753053665161,
|
| 2598 |
+
"learning_rate": 3.73996190924977e-05,
|
| 2599 |
+
"loss": 0.3856,
|
| 2600 |
+
"step": 18500
|
| 2601 |
+
},
|
| 2602 |
+
{
|
| 2603 |
+
"epoch": 0.3277906380873284,
|
| 2604 |
+
"grad_norm": 1.9692752361297607,
|
| 2605 |
+
"learning_rate": 3.735053307415916e-05,
|
| 2606 |
+
"loss": 0.3575,
|
| 2607 |
+
"step": 18550
|
| 2608 |
+
},
|
| 2609 |
+
{
|
| 2610 |
+
"epoch": 0.3286741708045449,
|
| 2611 |
+
"grad_norm": 3.517622232437134,
|
| 2612 |
+
"learning_rate": 3.730144705582062e-05,
|
| 2613 |
+
"loss": 0.347,
|
| 2614 |
+
"step": 18600
|
| 2615 |
}
|
| 2616 |
],
|
| 2617 |
"logging_steps": 50,
|