Training in progress, step 4100, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +21 -6
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 136000488
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58904a09d2c64b3af1fe77f00887cba4248f9483bd0ac2677c899654f1df66ed
|
| 3 |
size 136000488
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 268176506
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d399b620d3b044063e90c4a82c58a56992e901eda2495728359da6520d58da88
|
| 3 |
size 268176506
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01228c358c1c5bd7e8d1e30fd7170ee7e7e01eaa5037c80171296afcca88fdbd
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd593900009094dac4848a91bffea810cd27abb68e41341848d2730ca4540832
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4e118c022d36fb38d58d0d7f0e3edb67b05d2a137df1ba7684c820d27509268
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0c260699388e534a54ab049ec860d4b3bec4fc81bbf3db000ead4ea6f2487ca
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09ac653d13867a7357fb216c15368c33da04027e0687353248f2f6a7723c2089
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-
|
| 4 |
-
"epoch": 5.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -607,6 +607,21 @@
|
|
| 607 |
"eval_samples_per_second": 893.27,
|
| 608 |
"eval_steps_per_second": 3.609,
|
| 609 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
}
|
| 611 |
],
|
| 612 |
"logging_steps": 100,
|
|
@@ -621,7 +636,7 @@
|
|
| 621 |
"early_stopping_threshold": 0.0
|
| 622 |
},
|
| 623 |
"attributes": {
|
| 624 |
-
"early_stopping_patience_counter":
|
| 625 |
}
|
| 626 |
},
|
| 627 |
"TrainerControl": {
|
|
@@ -635,7 +650,7 @@
|
|
| 635 |
"attributes": {}
|
| 636 |
}
|
| 637 |
},
|
| 638 |
-
"total_flos": 1.
|
| 639 |
"train_batch_size": 64,
|
| 640 |
"trial_name": null,
|
| 641 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.7832607626914978,
|
| 3 |
+
"best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-4100",
|
| 4 |
+
"epoch": 5.4886211512717535,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 4100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 607 |
"eval_samples_per_second": 893.27,
|
| 608 |
"eval_steps_per_second": 3.609,
|
| 609 |
"step": 4000
|
| 610 |
+
},
|
| 611 |
+
{
|
| 612 |
+
"epoch": 5.4886211512717535,
|
| 613 |
+
"grad_norm": 0.22185169160366058,
|
| 614 |
+
"learning_rate": 0.00045113788487282465,
|
| 615 |
+
"loss": 0.7461,
|
| 616 |
+
"step": 4100
|
| 617 |
+
},
|
| 618 |
+
{
|
| 619 |
+
"epoch": 5.4886211512717535,
|
| 620 |
+
"eval_loss": 0.7832607626914978,
|
| 621 |
+
"eval_runtime": 6.3959,
|
| 622 |
+
"eval_samples_per_second": 889.94,
|
| 623 |
+
"eval_steps_per_second": 3.596,
|
| 624 |
+
"step": 4100
|
| 625 |
}
|
| 626 |
],
|
| 627 |
"logging_steps": 100,
|
|
|
|
| 636 |
"early_stopping_threshold": 0.0
|
| 637 |
},
|
| 638 |
"attributes": {
|
| 639 |
+
"early_stopping_patience_counter": 0
|
| 640 |
}
|
| 641 |
},
|
| 642 |
"TrainerControl": {
|
|
|
|
| 650 |
"attributes": {}
|
| 651 |
}
|
| 652 |
},
|
| 653 |
+
"total_flos": 1.48880310140928e+17,
|
| 654 |
"train_batch_size": 64,
|
| 655 |
"trial_name": null,
|
| 656 |
"trial_params": null
|