Training in progress, step 890000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7aed45f0ab31dea98b9869760d36ab73a26078c09333a23350a1212c72042c48
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07c213d1a42d199003850d981d6ccc1a53b07b35352b3c677b2fec2729c3a474
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c77bc69e9635ccd1de21423522e341a85a08863f86590b713104bba2dbfd70bb
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1993aafd21a33893d293353cc2d3a986655d484aa3f8d8bd3ce1158082956b62
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d31a0dbc0a9de5b13d8d236df1a529fa25f2a462a9bcc23416d4f0397bad521d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2118e2247b5fdbceb9d1ec4a69c6b9d09754ac3081a89daca9da9f417d9a57c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ce1b40c469ef20ff3c2e73618244894f6c048059642c597ebc6bc915a80cce6
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce8dd4b4014883403bd302ffd7cbfd4827bc6596e89d4a566a074b59e6257940
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c955d0a8374d68c8c7fd0b4a59ac81688e461d16a4ab83367d33bbcd82c828c2
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5d88d03c1c6897f3e815db204f99493002f497118b104ebefc70815e22888c1
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1d5e819bb6a0b170d191713e427e3ac82a202a5b895fa2fdb4da78756f26177
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -10566,11 +10566,131 @@
|
|
| 10566 |
"learning_rate": 1.543971715158307e-05,
|
| 10567 |
"loss": 0.2863,
|
| 10568 |
"step": 880000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10569 |
}
|
| 10570 |
],
|
| 10571 |
"max_steps": 1000000,
|
| 10572 |
"num_train_epochs": 2,
|
| 10573 |
-
"total_flos":
|
| 10574 |
"trial_name": null,
|
| 10575 |
"trial_params": null
|
| 10576 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3188750089683596,
|
| 5 |
+
"global_step": 890000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 10566 |
"learning_rate": 1.543971715158307e-05,
|
| 10567 |
"loss": 0.2863,
|
| 10568 |
"step": 880000
|
| 10569 |
+
},
|
| 10570 |
+
{
|
| 10571 |
+
"epoch": 1.3,
|
| 10572 |
+
"learning_rate": 1.539507089487205e-05,
|
| 10573 |
+
"loss": 0.2865,
|
| 10574 |
+
"step": 880500
|
| 10575 |
+
},
|
| 10576 |
+
{
|
| 10577 |
+
"epoch": 1.3,
|
| 10578 |
+
"learning_rate": 1.535060126557028e-05,
|
| 10579 |
+
"loss": 0.2861,
|
| 10580 |
+
"step": 881000
|
| 10581 |
+
},
|
| 10582 |
+
{
|
| 10583 |
+
"epoch": 1.3,
|
| 10584 |
+
"learning_rate": 1.5306308385255997e-05,
|
| 10585 |
+
"loss": 0.2859,
|
| 10586 |
+
"step": 881500
|
| 10587 |
+
},
|
| 10588 |
+
{
|
| 10589 |
+
"epoch": 1.3,
|
| 10590 |
+
"learning_rate": 1.5262192375024284e-05,
|
| 10591 |
+
"loss": 0.2856,
|
| 10592 |
+
"step": 882000
|
| 10593 |
+
},
|
| 10594 |
+
{
|
| 10595 |
+
"epoch": 1.3,
|
| 10596 |
+
"learning_rate": 1.521825335548661e-05,
|
| 10597 |
+
"loss": 0.2861,
|
| 10598 |
+
"step": 882500
|
| 10599 |
+
},
|
| 10600 |
+
{
|
| 10601 |
+
"epoch": 1.3,
|
| 10602 |
+
"learning_rate": 1.5174491446770566e-05,
|
| 10603 |
+
"loss": 0.2866,
|
| 10604 |
+
"step": 883000
|
| 10605 |
+
},
|
| 10606 |
+
{
|
| 10607 |
+
"epoch": 1.31,
|
| 10608 |
+
"learning_rate": 1.5130906768519563e-05,
|
| 10609 |
+
"loss": 0.2862,
|
| 10610 |
+
"step": 883500
|
| 10611 |
+
},
|
| 10612 |
+
{
|
| 10613 |
+
"epoch": 1.31,
|
| 10614 |
+
"learning_rate": 1.508749943989242e-05,
|
| 10615 |
+
"loss": 0.2864,
|
| 10616 |
+
"step": 884000
|
| 10617 |
+
},
|
| 10618 |
+
{
|
| 10619 |
+
"epoch": 1.31,
|
| 10620 |
+
"learning_rate": 1.5044269579563144e-05,
|
| 10621 |
+
"loss": 0.286,
|
| 10622 |
+
"step": 884500
|
| 10623 |
+
},
|
| 10624 |
+
{
|
| 10625 |
+
"epoch": 1.31,
|
| 10626 |
+
"learning_rate": 1.500121730572051e-05,
|
| 10627 |
+
"loss": 0.2859,
|
| 10628 |
+
"step": 885000
|
| 10629 |
+
},
|
| 10630 |
+
{
|
| 10631 |
+
"epoch": 1.31,
|
| 10632 |
+
"learning_rate": 1.4958342736067783e-05,
|
| 10633 |
+
"loss": 0.2856,
|
| 10634 |
+
"step": 885500
|
| 10635 |
+
},
|
| 10636 |
+
{
|
| 10637 |
+
"epoch": 1.31,
|
| 10638 |
+
"learning_rate": 1.4915645987822406e-05,
|
| 10639 |
+
"loss": 0.2858,
|
| 10640 |
+
"step": 886000
|
| 10641 |
+
},
|
| 10642 |
+
{
|
| 10643 |
+
"epoch": 1.31,
|
| 10644 |
+
"learning_rate": 1.4873127177715653e-05,
|
| 10645 |
+
"loss": 0.2847,
|
| 10646 |
+
"step": 886500
|
| 10647 |
+
},
|
| 10648 |
+
{
|
| 10649 |
+
"epoch": 1.31,
|
| 10650 |
+
"learning_rate": 1.4830786421992347e-05,
|
| 10651 |
+
"loss": 0.2863,
|
| 10652 |
+
"step": 887000
|
| 10653 |
+
},
|
| 10654 |
+
{
|
| 10655 |
+
"epoch": 1.31,
|
| 10656 |
+
"learning_rate": 1.4788623836410479e-05,
|
| 10657 |
+
"loss": 0.2857,
|
| 10658 |
+
"step": 887500
|
| 10659 |
+
},
|
| 10660 |
+
{
|
| 10661 |
+
"epoch": 1.31,
|
| 10662 |
+
"learning_rate": 1.4746639536240942e-05,
|
| 10663 |
+
"loss": 0.2856,
|
| 10664 |
+
"step": 888000
|
| 10665 |
+
},
|
| 10666 |
+
{
|
| 10667 |
+
"epoch": 1.32,
|
| 10668 |
+
"learning_rate": 1.4704833636267232e-05,
|
| 10669 |
+
"loss": 0.2858,
|
| 10670 |
+
"step": 888500
|
| 10671 |
+
},
|
| 10672 |
+
{
|
| 10673 |
+
"epoch": 1.32,
|
| 10674 |
+
"learning_rate": 1.4663206250785055e-05,
|
| 10675 |
+
"loss": 0.2854,
|
| 10676 |
+
"step": 889000
|
| 10677 |
+
},
|
| 10678 |
+
{
|
| 10679 |
+
"epoch": 1.32,
|
| 10680 |
+
"learning_rate": 1.4621757493602125e-05,
|
| 10681 |
+
"loss": 0.2857,
|
| 10682 |
+
"step": 889500
|
| 10683 |
+
},
|
| 10684 |
+
{
|
| 10685 |
+
"epoch": 1.32,
|
| 10686 |
+
"learning_rate": 1.4580487478037748e-05,
|
| 10687 |
+
"loss": 0.2854,
|
| 10688 |
+
"step": 890000
|
| 10689 |
}
|
| 10690 |
],
|
| 10691 |
"max_steps": 1000000,
|
| 10692 |
"num_train_epochs": 2,
|
| 10693 |
+
"total_flos": 6.017029328628566e+22,
|
| 10694 |
"trial_name": null,
|
| 10695 |
"trial_params": null
|
| 10696 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07c213d1a42d199003850d981d6ccc1a53b07b35352b3c677b2fec2729c3a474
|
| 3 |
size 449450757
|