Training in progress, step 900000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81603416f1fba3109b9ef2dfed9df589999e06f1186e2baeac09426d5454ae3d
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd0588845ddc4a31c9c47550dec41eb37349d2edd4e1f34356e1f993b492ba0
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f495dfd7f4ccee04d72bad0e351a96e4d86b89643197cb94dfee5e43013e7b2
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a3ab375a848ecaefff3706d2c692ac9259621d2aa93953ba9d5f4510dca87b7
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48bc8ea8a56da3a164d01716edb5b450bdffa75fb03183f592780d30cdcccc55
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bef8a52198e713e41af5f2aacfd75864c1e5d1bcaad410e37c104450edb03f2
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa624c96f92eaad028a188cf78acc34c38cdc88db165dfecd04176965e65555e
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 13.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6592,11 +6592,85 @@
|
|
| 6592 |
"eval_samples_per_second": 1364.004,
|
| 6593 |
"eval_steps_per_second": 21.824,
|
| 6594 |
"step": 890000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6595 |
}
|
| 6596 |
],
|
| 6597 |
"max_steps": 1000000,
|
| 6598 |
"num_train_epochs": 16,
|
| 6599 |
-
"total_flos": 6.
|
| 6600 |
"trial_name": null,
|
| 6601 |
"trial_params": null
|
| 6602 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 13.743185670438407,
|
| 5 |
+
"global_step": 900000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6592 |
"eval_samples_per_second": 1364.004,
|
| 6593 |
"eval_steps_per_second": 21.824,
|
| 6594 |
"step": 890000
|
| 6595 |
+
},
|
| 6596 |
+
{
|
| 6597 |
+
"epoch": 13.61,
|
| 6598 |
+
"learning_rate": 1.4498484122598232e-05,
|
| 6599 |
+
"loss": 0.2308,
|
| 6600 |
+
"step": 891000
|
| 6601 |
+
},
|
| 6602 |
+
{
|
| 6603 |
+
"epoch": 13.62,
|
| 6604 |
+
"learning_rate": 1.4417197081242083e-05,
|
| 6605 |
+
"loss": 0.2305,
|
| 6606 |
+
"step": 892000
|
| 6607 |
+
},
|
| 6608 |
+
{
|
| 6609 |
+
"epoch": 13.64,
|
| 6610 |
+
"learning_rate": 1.433662724291136e-05,
|
| 6611 |
+
"loss": 0.2313,
|
| 6612 |
+
"step": 893000
|
| 6613 |
+
},
|
| 6614 |
+
{
|
| 6615 |
+
"epoch": 13.65,
|
| 6616 |
+
"learning_rate": 1.4256775488704904e-05,
|
| 6617 |
+
"loss": 0.2311,
|
| 6618 |
+
"step": 894000
|
| 6619 |
+
},
|
| 6620 |
+
{
|
| 6621 |
+
"epoch": 13.67,
|
| 6622 |
+
"learning_rate": 1.4177642691868717e-05,
|
| 6623 |
+
"loss": 0.231,
|
| 6624 |
+
"step": 895000
|
| 6625 |
+
},
|
| 6626 |
+
{
|
| 6627 |
+
"epoch": 13.67,
|
| 6628 |
+
"eval_runtime": 0.7236,
|
| 6629 |
+
"eval_samples_per_second": 1382.053,
|
| 6630 |
+
"eval_steps_per_second": 22.113,
|
| 6631 |
+
"step": 895000
|
| 6632 |
+
},
|
| 6633 |
+
{
|
| 6634 |
+
"epoch": 13.68,
|
| 6635 |
+
"learning_rate": 1.4099229717786368e-05,
|
| 6636 |
+
"loss": 0.231,
|
| 6637 |
+
"step": 896000
|
| 6638 |
+
},
|
| 6639 |
+
{
|
| 6640 |
+
"epoch": 13.7,
|
| 6641 |
+
"learning_rate": 1.4021537423969588e-05,
|
| 6642 |
+
"loss": 0.2317,
|
| 6643 |
+
"step": 897000
|
| 6644 |
+
},
|
| 6645 |
+
{
|
| 6646 |
+
"epoch": 13.71,
|
| 6647 |
+
"learning_rate": 1.3944566660048863e-05,
|
| 6648 |
+
"loss": 0.2308,
|
| 6649 |
+
"step": 898000
|
| 6650 |
+
},
|
| 6651 |
+
{
|
| 6652 |
+
"epoch": 13.73,
|
| 6653 |
+
"learning_rate": 1.3868318267764128e-05,
|
| 6654 |
+
"loss": 0.2309,
|
| 6655 |
+
"step": 899000
|
| 6656 |
+
},
|
| 6657 |
+
{
|
| 6658 |
+
"epoch": 13.74,
|
| 6659 |
+
"learning_rate": 1.3792793080955574e-05,
|
| 6660 |
+
"loss": 0.2308,
|
| 6661 |
+
"step": 900000
|
| 6662 |
+
},
|
| 6663 |
+
{
|
| 6664 |
+
"epoch": 13.74,
|
| 6665 |
+
"eval_runtime": 0.7542,
|
| 6666 |
+
"eval_samples_per_second": 1325.982,
|
| 6667 |
+
"eval_steps_per_second": 21.216,
|
| 6668 |
+
"step": 900000
|
| 6669 |
}
|
| 6670 |
],
|
| 6671 |
"max_steps": 1000000,
|
| 6672 |
"num_train_epochs": 16,
|
| 6673 |
+
"total_flos": 6.309014950845309e+22,
|
| 6674 |
"trial_name": null,
|
| 6675 |
"trial_params": null
|
| 6676 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd0588845ddc4a31c9c47550dec41eb37349d2edd4e1f34356e1f993b492ba0
|
| 3 |
size 449471589
|