Training in progress, step 12000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1261,6 +1261,10 @@ You can finetune this model on your own dataset.
|
|
| 1261 |
| 0.2067 | 11700 | 0.4562 |
|
| 1262 |
| 0.2076 | 11750 | 0.3946 |
|
| 1263 |
| 0.2085 | 11800 | 0.4075 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1264 |
|
| 1265 |
|
| 1266 |
### Framework Versions
|
|
|
|
| 1261 |
| 0.2067 | 11700 | 0.4562 |
|
| 1262 |
| 0.2076 | 11750 | 0.3946 |
|
| 1263 |
| 0.2085 | 11800 | 0.4075 |
|
| 1264 |
+
| 0.2094 | 11850 | 0.4304 |
|
| 1265 |
+
| 0.2103 | 11900 | 0.3404 |
|
| 1266 |
+
| 0.2112 | 11950 | 0.4013 |
|
| 1267 |
+
| 0.2120 | 12000 | 0.4278 |
|
| 1268 |
|
| 1269 |
|
| 1270 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70ee7d65436b3c3bdb265a584cb598d21c1152436f7bbe5e5b89abe72d05abfd
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eacbc7b18200a700f9e868dccad3bd885c8a77978e6ef6450cc02303852cb1f4
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d3ba6287442404e0fa92289c3fc1da4f9e94c42e35bf2cb41249702ce19aede
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08f76080c54c5059b8f1d78797380cba29afea4aa1d320a9c29f8474a65aad22
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9a38922a27408f3b8fdb8e3c0104dbbbeb511c2171a5f4ccac9abbb96798cd4
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1660,6 +1660,34 @@
|
|
| 1660 |
"learning_rate": 4.3974200388761265e-05,
|
| 1661 |
"loss": 0.4075,
|
| 1662 |
"step": 11800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1663 |
}
|
| 1664 |
],
|
| 1665 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.21204785213196445,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 12000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1660 |
"learning_rate": 4.3974200388761265e-05,
|
| 1661 |
"loss": 0.4075,
|
| 1662 |
"step": 11800
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 0.2093972539803149,
|
| 1666 |
+
"grad_norm": 2.601897716522217,
|
| 1667 |
+
"learning_rate": 4.392511437042273e-05,
|
| 1668 |
+
"loss": 0.4304,
|
| 1669 |
+
"step": 11850
|
| 1670 |
+
},
|
| 1671 |
+
{
|
| 1672 |
+
"epoch": 0.21028078669753142,
|
| 1673 |
+
"grad_norm": 1.58085036277771,
|
| 1674 |
+
"learning_rate": 4.3876028352084194e-05,
|
| 1675 |
+
"loss": 0.3404,
|
| 1676 |
+
"step": 11900
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 0.21116431941474792,
|
| 1680 |
+
"grad_norm": 1.7569571733474731,
|
| 1681 |
+
"learning_rate": 4.382694233374566e-05,
|
| 1682 |
+
"loss": 0.4013,
|
| 1683 |
+
"step": 11950
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"epoch": 0.21204785213196445,
|
| 1687 |
+
"grad_norm": 1.9872467517852783,
|
| 1688 |
+
"learning_rate": 4.3777856315407124e-05,
|
| 1689 |
+
"loss": 0.4278,
|
| 1690 |
+
"step": 12000
|
| 1691 |
}
|
| 1692 |
],
|
| 1693 |
"logging_steps": 50,
|