Training in progress, step 46000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1209,6 +1209,10 @@ You can finetune this model on your own dataset.
|
|
| 1209 |
| 0.8075 | 45700 | 0.3975 |
|
| 1210 |
| 0.8084 | 45750 | 0.391 |
|
| 1211 |
| 0.8093 | 45800 | 0.3055 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1212 |
|
| 1213 |
|
| 1214 |
### Framework Versions
|
|
|
|
| 1209 |
| 0.8075 | 45700 | 0.3975 |
|
| 1210 |
| 0.8084 | 45750 | 0.391 |
|
| 1211 |
| 0.8093 | 45800 | 0.3055 |
|
| 1212 |
+
| 0.8102 | 45850 | 0.2434 |
|
| 1213 |
+
| 0.8111 | 45900 | 0.285 |
|
| 1214 |
+
| 0.8120 | 45950 | 0.3952 |
|
| 1215 |
+
| 0.8129 | 46000 | 0.2802 |
|
| 1216 |
|
| 1217 |
|
| 1218 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbc453c795079d8ded19817daacd8fec5d095bba6393f23049cff77eeb1abaf9
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:303c03bcbf6290c5fb7fcf2c4927d56af5a210c091c12a9beba5ee6c9213f174
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:431f36421847b278b660e6526fb15af6b02fbddb625572cac02bb7ad994d2dda
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89e42fd89832885ab30327a4c2371265408743318772a17d94083f8bfb054483
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e4d32ff62b246761d94af48903691044aacc80825efb2f5658f28d83287222c
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6420,6 +6420,34 @@
|
|
| 6420 |
"learning_rate": 1.061141544442481e-05,
|
| 6421 |
"loss": 0.3055,
|
| 6422 |
"step": 45800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6423 |
}
|
| 6424 |
],
|
| 6425 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.8128500998391971,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 46000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6420 |
"learning_rate": 1.061141544442481e-05,
|
| 6421 |
"loss": 0.3055,
|
| 6422 |
"step": 45800
|
| 6423 |
+
},
|
| 6424 |
+
{
|
| 6425 |
+
"epoch": 0.8101995016875475,
|
| 6426 |
+
"grad_norm": 1.3673596382141113,
|
| 6427 |
+
"learning_rate": 1.0562329426086275e-05,
|
| 6428 |
+
"loss": 0.2434,
|
| 6429 |
+
"step": 45850
|
| 6430 |
+
},
|
| 6431 |
+
{
|
| 6432 |
+
"epoch": 0.811083034404764,
|
| 6433 |
+
"grad_norm": 2.5049281120300293,
|
| 6434 |
+
"learning_rate": 1.0513243407747738e-05,
|
| 6435 |
+
"loss": 0.285,
|
| 6436 |
+
"step": 45900
|
| 6437 |
+
},
|
| 6438 |
+
{
|
| 6439 |
+
"epoch": 0.8119665671219806,
|
| 6440 |
+
"grad_norm": 4.577225208282471,
|
| 6441 |
+
"learning_rate": 1.04641573894092e-05,
|
| 6442 |
+
"loss": 0.3952,
|
| 6443 |
+
"step": 45950
|
| 6444 |
+
},
|
| 6445 |
+
{
|
| 6446 |
+
"epoch": 0.8128500998391971,
|
| 6447 |
+
"grad_norm": 1.4778873920440674,
|
| 6448 |
+
"learning_rate": 1.0415071371070664e-05,
|
| 6449 |
+
"loss": 0.2802,
|
| 6450 |
+
"step": 46000
|
| 6451 |
}
|
| 6452 |
],
|
| 6453 |
"logging_steps": 50,
|