Training in progress, step 24000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1503,6 +1503,10 @@ You can finetune this model on your own dataset.
|
|
| 1503 |
| 0.4188 | 23700 | 0.2905 |
|
| 1504 |
| 0.4197 | 23750 | 0.3234 |
|
| 1505 |
| 0.4206 | 23800 | 0.3063 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1506 |
|
| 1507 |
</details>
|
| 1508 |
|
|
|
|
| 1503 |
| 0.4188 | 23700 | 0.2905 |
|
| 1504 |
| 0.4197 | 23750 | 0.3234 |
|
| 1505 |
| 0.4206 | 23800 | 0.3063 |
|
| 1506 |
+
| 0.4214 | 23850 | 0.3386 |
|
| 1507 |
+
| 0.4223 | 23900 | 0.3431 |
|
| 1508 |
+
| 0.4232 | 23950 | 0.2902 |
|
| 1509 |
+
| 0.4241 | 24000 | 0.3136 |
|
| 1510 |
|
| 1511 |
</details>
|
| 1512 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f90e824e1a9ffb638cde5da629726eeb7bc12cd6362fbb12d7d04ce5372c8f5
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9a99a7427fcc196b3d93d1ed84e8a7ea1d28d3ea047d1257af87399a72693de
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66083f07808429ecf1c592a46d5e2fa5fd55c02b1303e07c74cebcc8fc44243c
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:367e0d39f4bf5ffc122ff5c01987f9ae3492dd527c06b1748807449df9f2581e
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33058f7a86733565c00edc7dc90e657260ca9633bceae0708704d9688e1a076c
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3340,6 +3340,34 @@
|
|
| 3340 |
"learning_rate": 3.2199446309713146e-05,
|
| 3341 |
"loss": 0.3063,
|
| 3342 |
"step": 23800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3343 |
}
|
| 3344 |
],
|
| 3345 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.4240957042639289,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 24000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3340 |
"learning_rate": 3.2199446309713146e-05,
|
| 3341 |
"loss": 0.3063,
|
| 3342 |
"step": 23800
|
| 3343 |
+
},
|
| 3344 |
+
{
|
| 3345 |
+
"epoch": 0.4214451061122793,
|
| 3346 |
+
"grad_norm": 1.289444923400879,
|
| 3347 |
+
"learning_rate": 3.2150360291374604e-05,
|
| 3348 |
+
"loss": 0.3386,
|
| 3349 |
+
"step": 23850
|
| 3350 |
+
},
|
| 3351 |
+
{
|
| 3352 |
+
"epoch": 0.42232863882949584,
|
| 3353 |
+
"grad_norm": 2.95922589302063,
|
| 3354 |
+
"learning_rate": 3.210127427303607e-05,
|
| 3355 |
+
"loss": 0.3431,
|
| 3356 |
+
"step": 23900
|
| 3357 |
+
},
|
| 3358 |
+
{
|
| 3359 |
+
"epoch": 0.42321217154671237,
|
| 3360 |
+
"grad_norm": 1.6753530502319336,
|
| 3361 |
+
"learning_rate": 3.2052188254697534e-05,
|
| 3362 |
+
"loss": 0.2902,
|
| 3363 |
+
"step": 23950
|
| 3364 |
+
},
|
| 3365 |
+
{
|
| 3366 |
+
"epoch": 0.4240957042639289,
|
| 3367 |
+
"grad_norm": 1.6901003122329712,
|
| 3368 |
+
"learning_rate": 3.2003102236359e-05,
|
| 3369 |
+
"loss": 0.3136,
|
| 3370 |
+
"step": 24000
|
| 3371 |
}
|
| 3372 |
],
|
| 3373 |
"logging_steps": 50,
|