Training in progress, step 10200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1225,6 +1225,10 @@ You can finetune this model on your own dataset.
|
|
| 1225 |
| 0.1749 | 9900 | 0.4193 |
|
| 1226 |
| 0.1758 | 9950 | 0.3173 |
|
| 1227 |
| 0.1767 | 10000 | 0.4569 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1228 |
|
| 1229 |
|
| 1230 |
### Framework Versions
|
|
|
|
| 1225 |
| 0.1749 | 9900 | 0.4193 |
|
| 1226 |
| 0.1758 | 9950 | 0.3173 |
|
| 1227 |
| 0.1767 | 10000 | 0.4569 |
|
| 1228 |
+
| 0.1776 | 10050 | 0.4538 |
|
| 1229 |
+
| 0.1785 | 10100 | 0.4422 |
|
| 1230 |
+
| 0.1794 | 10150 | 0.3747 |
|
| 1231 |
+
| 0.1802 | 10200 | 0.3989 |
|
| 1232 |
|
| 1233 |
|
| 1234 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b1113f1dc63b4ef74f5c024aa4257a74f5c601162a5392123b472bd440c772d
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:372cd21ba6640c7fb2d1ef5b71e6d5270fa8bff460e3c646226db5c68492b951
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11b6cb074bbe2129c4a92512f6b7604d9e93435cd6ffac4a406363aba2e66f67
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8373615cf09b792af1b9fd441a341b87607a411c0918e8ae083ffab9de6dcc10
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58c8231fde4f806f412947f64ac786f123198ace358add629069cb5de99c1e42
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1408,6 +1408,34 @@
|
|
| 1408 |
"learning_rate": 4.574031532858181e-05,
|
| 1409 |
"loss": 0.4569,
|
| 1410 |
"step": 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1411 |
}
|
| 1412 |
],
|
| 1413 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.18024067431216978,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 10200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1408 |
"learning_rate": 4.574031532858181e-05,
|
| 1409 |
"loss": 0.4569,
|
| 1410 |
"step": 10000
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"epoch": 0.17759007616052022,
|
| 1414 |
+
"grad_norm": 1.6226812601089478,
|
| 1415 |
+
"learning_rate": 4.569122931024327e-05,
|
| 1416 |
+
"loss": 0.4538,
|
| 1417 |
+
"step": 10050
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"epoch": 0.17847360887773675,
|
| 1421 |
+
"grad_norm": 1.9845385551452637,
|
| 1422 |
+
"learning_rate": 4.564214329190474e-05,
|
| 1423 |
+
"loss": 0.4422,
|
| 1424 |
+
"step": 10100
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"epoch": 0.17935714159495325,
|
| 1428 |
+
"grad_norm": 1.7016047239303589,
|
| 1429 |
+
"learning_rate": 4.5593057273566195e-05,
|
| 1430 |
+
"loss": 0.3747,
|
| 1431 |
+
"step": 10150
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"epoch": 0.18024067431216978,
|
| 1435 |
+
"grad_norm": 2.2167670726776123,
|
| 1436 |
+
"learning_rate": 4.5543971255227666e-05,
|
| 1437 |
+
"loss": 0.3989,
|
| 1438 |
+
"step": 10200
|
| 1439 |
}
|
| 1440 |
],
|
| 1441 |
"logging_steps": 50,
|