Training in progress, step 18200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1387,6 +1387,10 @@ You can finetune this model on your own dataset.
|
|
| 1387 |
| 0.3163 | 17900 | 0.4349 |
|
| 1388 |
| 0.3172 | 17950 | 0.3633 |
|
| 1389 |
| 0.3181 | 18000 | 0.3431 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1390 |
|
| 1391 |
</details>
|
| 1392 |
|
|
|
|
| 1387 |
| 0.3163 | 17900 | 0.4349 |
|
| 1388 |
| 0.3172 | 17950 | 0.3633 |
|
| 1389 |
| 0.3181 | 18000 | 0.3431 |
|
| 1390 |
+
| 0.3190 | 18050 | 0.3986 |
|
| 1391 |
+
| 0.3198 | 18100 | 0.3279 |
|
| 1392 |
+
| 0.3207 | 18150 | 0.3062 |
|
| 1393 |
+
| 0.3216 | 18200 | 0.2973 |
|
| 1394 |
|
| 1395 |
</details>
|
| 1396 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c561d22f3f5062bc767250ffdb3fa4a0f7bd3dbdb65e4c11cfceaa01995c64c
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:979b402a40bd2a435e70fee699ca07b55766750d192cbf2268122098c8ea3e92
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28d8ccebbb7f7b52e625ef2554e1cd3690dd81aea7ece9b35eeb250cf32f7566
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1747e186a8b719e713dac067421a4083615d1c151147e0b1a41977c8731e3e98
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:896e9fd82393923d4baed79b5528ea09727379e27a1318af2376e0aaf0f43d15
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2528,6 +2528,34 @@
|
|
| 2528 |
"learning_rate": 3.7890479275883064e-05,
|
| 2529 |
"loss": 0.3431,
|
| 2530 |
"step": 18000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2531 |
}
|
| 2532 |
],
|
| 2533 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.32160590906681275,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 18200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2528 |
"learning_rate": 3.7890479275883064e-05,
|
| 2529 |
"loss": 0.3431,
|
| 2530 |
"step": 18000
|
| 2531 |
+
},
|
| 2532 |
+
{
|
| 2533 |
+
"epoch": 0.3189553109151632,
|
| 2534 |
+
"grad_norm": 1.6438477039337158,
|
| 2535 |
+
"learning_rate": 3.784139325754452e-05,
|
| 2536 |
+
"loss": 0.3986,
|
| 2537 |
+
"step": 18050
|
| 2538 |
+
},
|
| 2539 |
+
{
|
| 2540 |
+
"epoch": 0.3198388436323797,
|
| 2541 |
+
"grad_norm": 1.6794339418411255,
|
| 2542 |
+
"learning_rate": 3.779230723920599e-05,
|
| 2543 |
+
"loss": 0.3279,
|
| 2544 |
+
"step": 18100
|
| 2545 |
+
},
|
| 2546 |
+
{
|
| 2547 |
+
"epoch": 0.3207223763495962,
|
| 2548 |
+
"grad_norm": 1.5067431926727295,
|
| 2549 |
+
"learning_rate": 3.7743221220867445e-05,
|
| 2550 |
+
"loss": 0.3062,
|
| 2551 |
+
"step": 18150
|
| 2552 |
+
},
|
| 2553 |
+
{
|
| 2554 |
+
"epoch": 0.32160590906681275,
|
| 2555 |
+
"grad_norm": 1.6953719854354858,
|
| 2556 |
+
"learning_rate": 3.7694135202528916e-05,
|
| 2557 |
+
"loss": 0.2973,
|
| 2558 |
+
"step": 18200
|
| 2559 |
}
|
| 2560 |
],
|
| 2561 |
"logging_steps": 50,
|