Training in progress, step 4200, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +20 -5
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 136000488
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2ed1408e2efdd98c7b559fc6298c728700c459403fc2ddc8b7970d6883b8151
|
| 3 |
size 136000488
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 268176506
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6187567f5e91ca049754cdf7f8ffb52ba96c047d25df85f69a5200af3843250e
|
| 3 |
size 268176506
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf8501dea4d10ea756923480c061d5728383aa62af8d35b092ebfbd3d967586c
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aadc5bf956670e67c41e56faeaa14475e61fa86563d6096803859a703c17b32f
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:041f006de55d28fa25ff03374e6f3e91438a73a58213ec73d04709da0ca494b3
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a48b8a88d02d9f0ea814656ef0a0d2f282fcaee5f0c798e59f256612b22adb5a
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67170579fb4c35acb5c0b3b0c2ce7debff2589cc4217ac3a9b76066d360d24bd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-
|
| 4 |
-
"epoch": 5.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -622,6 +622,21 @@
|
|
| 622 |
"eval_samples_per_second": 889.94,
|
| 623 |
"eval_steps_per_second": 3.596,
|
| 624 |
"step": 4100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
}
|
| 626 |
],
|
| 627 |
"logging_steps": 100,
|
|
@@ -650,7 +665,7 @@
|
|
| 650 |
"attributes": {}
|
| 651 |
}
|
| 652 |
},
|
| 653 |
-
"total_flos": 1.
|
| 654 |
"train_batch_size": 64,
|
| 655 |
"trial_name": null,
|
| 656 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.78049236536026,
|
| 3 |
+
"best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-4200",
|
| 4 |
+
"epoch": 5.622489959839357,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 4200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 622 |
"eval_samples_per_second": 889.94,
|
| 623 |
"eval_steps_per_second": 3.596,
|
| 624 |
"step": 4100
|
| 625 |
+
},
|
| 626 |
+
{
|
| 627 |
+
"epoch": 5.622489959839357,
|
| 628 |
+
"grad_norm": 0.19276629388332367,
|
| 629 |
+
"learning_rate": 0.0004377510040160643,
|
| 630 |
+
"loss": 0.7411,
|
| 631 |
+
"step": 4200
|
| 632 |
+
},
|
| 633 |
+
{
|
| 634 |
+
"epoch": 5.622489959839357,
|
| 635 |
+
"eval_loss": 0.78049236536026,
|
| 636 |
+
"eval_runtime": 6.3726,
|
| 637 |
+
"eval_samples_per_second": 893.205,
|
| 638 |
+
"eval_steps_per_second": 3.609,
|
| 639 |
+
"step": 4200
|
| 640 |
}
|
| 641 |
],
|
| 642 |
"logging_steps": 100,
|
|
|
|
| 665 |
"attributes": {}
|
| 666 |
}
|
| 667 |
},
|
| 668 |
+
"total_flos": 1.52511537217536e+17,
|
| 669 |
"train_batch_size": 64,
|
| 670 |
"trial_name": null,
|
| 671 |
"trial_params": null
|