Training in progress, epoch 10
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236469913
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e50c16003edb8883d4149aea560899fa302da8a5ea265041b7b2b0e6e753ea6
|
| 3 |
size 236469913
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:020c47f09229e7b4397da1597dd814d8bad9db375a9c2b2366593d834ff17bb0
|
| 3 |
size 118242180
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:811189d72660f95f34538db512a28cda03a6481472b571473a39751393e425ed
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68eb61321f52d319beb3b0aea14b956dcbfbc7a51e4307bb6194168d92337ac6
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1cd8f88a9cad614bc2f0545599431a338717269118bde820f7a2ebef8c3d177
|
| 3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5a601c907d442bfc98aecb4b414394e5a711f038902c4eca06616f9bff709ff
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90712adf282f1d59d322b20c807b46ede7a0fd0f43d34d14f6b8d9ee4800700e
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7587,11 +7587,854 @@
|
|
| 7587 |
"eval_samples_per_second": 966.392,
|
| 7588 |
"eval_steps_per_second": 40.267,
|
| 7589 |
"step": 625257
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7590 |
}
|
| 7591 |
],
|
| 7592 |
"max_steps": 972622,
|
| 7593 |
"num_train_epochs": 14,
|
| 7594 |
-
"total_flos":
|
| 7595 |
"trial_name": null,
|
| 7596 |
"trial_params": null
|
| 7597 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.0,
|
| 5 |
+
"global_step": 694730,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7587 |
"eval_samples_per_second": 966.392,
|
| 7588 |
"eval_steps_per_second": 40.267,
|
| 7589 |
"step": 625257
|
| 7590 |
+
},
|
| 7591 |
+
{
|
| 7592 |
+
"epoch": 9.0,
|
| 7593 |
+
"learning_rate": 3.6078492462833434e-05,
|
| 7594 |
+
"loss": 2.84,
|
| 7595 |
+
"step": 625500
|
| 7596 |
+
},
|
| 7597 |
+
{
|
| 7598 |
+
"epoch": 9.01,
|
| 7599 |
+
"learning_rate": 3.60265657210807e-05,
|
| 7600 |
+
"loss": 2.8314,
|
| 7601 |
+
"step": 626000
|
| 7602 |
+
},
|
| 7603 |
+
{
|
| 7604 |
+
"epoch": 9.02,
|
| 7605 |
+
"learning_rate": 3.597463897932797e-05,
|
| 7606 |
+
"loss": 2.8356,
|
| 7607 |
+
"step": 626500
|
| 7608 |
+
},
|
| 7609 |
+
{
|
| 7610 |
+
"epoch": 9.03,
|
| 7611 |
+
"learning_rate": 3.592271223757523e-05,
|
| 7612 |
+
"loss": 2.8391,
|
| 7613 |
+
"step": 627000
|
| 7614 |
+
},
|
| 7615 |
+
{
|
| 7616 |
+
"epoch": 9.03,
|
| 7617 |
+
"learning_rate": 3.5870785495822494e-05,
|
| 7618 |
+
"loss": 2.8317,
|
| 7619 |
+
"step": 627500
|
| 7620 |
+
},
|
| 7621 |
+
{
|
| 7622 |
+
"epoch": 9.04,
|
| 7623 |
+
"learning_rate": 3.581896260755327e-05,
|
| 7624 |
+
"loss": 2.8298,
|
| 7625 |
+
"step": 628000
|
| 7626 |
+
},
|
| 7627 |
+
{
|
| 7628 |
+
"epoch": 9.05,
|
| 7629 |
+
"learning_rate": 3.576703586580053e-05,
|
| 7630 |
+
"loss": 2.8356,
|
| 7631 |
+
"step": 628500
|
| 7632 |
+
},
|
| 7633 |
+
{
|
| 7634 |
+
"epoch": 9.05,
|
| 7635 |
+
"learning_rate": 3.5715109124047796e-05,
|
| 7636 |
+
"loss": 2.8319,
|
| 7637 |
+
"step": 629000
|
| 7638 |
+
},
|
| 7639 |
+
{
|
| 7640 |
+
"epoch": 9.06,
|
| 7641 |
+
"learning_rate": 3.566318238229506e-05,
|
| 7642 |
+
"loss": 2.8361,
|
| 7643 |
+
"step": 629500
|
| 7644 |
+
},
|
| 7645 |
+
{
|
| 7646 |
+
"epoch": 9.07,
|
| 7647 |
+
"learning_rate": 3.5611255640542326e-05,
|
| 7648 |
+
"loss": 2.8352,
|
| 7649 |
+
"step": 630000
|
| 7650 |
+
},
|
| 7651 |
+
{
|
| 7652 |
+
"epoch": 9.08,
|
| 7653 |
+
"learning_rate": 3.555932889878959e-05,
|
| 7654 |
+
"loss": 2.8362,
|
| 7655 |
+
"step": 630500
|
| 7656 |
+
},
|
| 7657 |
+
{
|
| 7658 |
+
"epoch": 9.08,
|
| 7659 |
+
"learning_rate": 3.5507402157036856e-05,
|
| 7660 |
+
"loss": 2.838,
|
| 7661 |
+
"step": 631000
|
| 7662 |
+
},
|
| 7663 |
+
{
|
| 7664 |
+
"epoch": 9.09,
|
| 7665 |
+
"learning_rate": 3.545547541528412e-05,
|
| 7666 |
+
"loss": 2.8323,
|
| 7667 |
+
"step": 631500
|
| 7668 |
+
},
|
| 7669 |
+
{
|
| 7670 |
+
"epoch": 9.1,
|
| 7671 |
+
"learning_rate": 3.540365252701489e-05,
|
| 7672 |
+
"loss": 2.8342,
|
| 7673 |
+
"step": 632000
|
| 7674 |
+
},
|
| 7675 |
+
{
|
| 7676 |
+
"epoch": 9.1,
|
| 7677 |
+
"learning_rate": 3.535172578526215e-05,
|
| 7678 |
+
"loss": 2.8374,
|
| 7679 |
+
"step": 632500
|
| 7680 |
+
},
|
| 7681 |
+
{
|
| 7682 |
+
"epoch": 9.11,
|
| 7683 |
+
"learning_rate": 3.5299902896992925e-05,
|
| 7684 |
+
"loss": 2.835,
|
| 7685 |
+
"step": 633000
|
| 7686 |
+
},
|
| 7687 |
+
{
|
| 7688 |
+
"epoch": 9.12,
|
| 7689 |
+
"learning_rate": 3.524797615524019e-05,
|
| 7690 |
+
"loss": 2.8346,
|
| 7691 |
+
"step": 633500
|
| 7692 |
+
},
|
| 7693 |
+
{
|
| 7694 |
+
"epoch": 9.13,
|
| 7695 |
+
"learning_rate": 3.5196049413487455e-05,
|
| 7696 |
+
"loss": 2.8382,
|
| 7697 |
+
"step": 634000
|
| 7698 |
+
},
|
| 7699 |
+
{
|
| 7700 |
+
"epoch": 9.13,
|
| 7701 |
+
"learning_rate": 3.5144122671734716e-05,
|
| 7702 |
+
"loss": 2.8309,
|
| 7703 |
+
"step": 634500
|
| 7704 |
+
},
|
| 7705 |
+
{
|
| 7706 |
+
"epoch": 9.14,
|
| 7707 |
+
"learning_rate": 3.5092195929981985e-05,
|
| 7708 |
+
"loss": 2.8379,
|
| 7709 |
+
"step": 635000
|
| 7710 |
+
},
|
| 7711 |
+
{
|
| 7712 |
+
"epoch": 9.15,
|
| 7713 |
+
"learning_rate": 3.504037304171275e-05,
|
| 7714 |
+
"loss": 2.8338,
|
| 7715 |
+
"step": 635500
|
| 7716 |
+
},
|
| 7717 |
+
{
|
| 7718 |
+
"epoch": 9.15,
|
| 7719 |
+
"learning_rate": 3.498844629996002e-05,
|
| 7720 |
+
"loss": 2.8332,
|
| 7721 |
+
"step": 636000
|
| 7722 |
+
},
|
| 7723 |
+
{
|
| 7724 |
+
"epoch": 9.16,
|
| 7725 |
+
"learning_rate": 3.493651955820728e-05,
|
| 7726 |
+
"loss": 2.8359,
|
| 7727 |
+
"step": 636500
|
| 7728 |
+
},
|
| 7729 |
+
{
|
| 7730 |
+
"epoch": 9.17,
|
| 7731 |
+
"learning_rate": 3.488459281645455e-05,
|
| 7732 |
+
"loss": 2.8306,
|
| 7733 |
+
"step": 637000
|
| 7734 |
+
},
|
| 7735 |
+
{
|
| 7736 |
+
"epoch": 9.18,
|
| 7737 |
+
"learning_rate": 3.483266607470182e-05,
|
| 7738 |
+
"loss": 2.839,
|
| 7739 |
+
"step": 637500
|
| 7740 |
+
},
|
| 7741 |
+
{
|
| 7742 |
+
"epoch": 9.18,
|
| 7743 |
+
"learning_rate": 3.478073933294907e-05,
|
| 7744 |
+
"loss": 2.8349,
|
| 7745 |
+
"step": 638000
|
| 7746 |
+
},
|
| 7747 |
+
{
|
| 7748 |
+
"epoch": 9.19,
|
| 7749 |
+
"learning_rate": 3.472881259119634e-05,
|
| 7750 |
+
"loss": 2.8302,
|
| 7751 |
+
"step": 638500
|
| 7752 |
+
},
|
| 7753 |
+
{
|
| 7754 |
+
"epoch": 9.2,
|
| 7755 |
+
"learning_rate": 3.467698970292711e-05,
|
| 7756 |
+
"loss": 2.828,
|
| 7757 |
+
"step": 639000
|
| 7758 |
+
},
|
| 7759 |
+
{
|
| 7760 |
+
"epoch": 9.21,
|
| 7761 |
+
"learning_rate": 3.4625062961174375e-05,
|
| 7762 |
+
"loss": 2.8344,
|
| 7763 |
+
"step": 639500
|
| 7764 |
+
},
|
| 7765 |
+
{
|
| 7766 |
+
"epoch": 9.21,
|
| 7767 |
+
"learning_rate": 3.457313621942164e-05,
|
| 7768 |
+
"loss": 2.8365,
|
| 7769 |
+
"step": 640000
|
| 7770 |
+
},
|
| 7771 |
+
{
|
| 7772 |
+
"epoch": 9.22,
|
| 7773 |
+
"learning_rate": 3.452120947766891e-05,
|
| 7774 |
+
"loss": 2.8357,
|
| 7775 |
+
"step": 640500
|
| 7776 |
+
},
|
| 7777 |
+
{
|
| 7778 |
+
"epoch": 9.23,
|
| 7779 |
+
"learning_rate": 3.4469282735916166e-05,
|
| 7780 |
+
"loss": 2.8318,
|
| 7781 |
+
"step": 641000
|
| 7782 |
+
},
|
| 7783 |
+
{
|
| 7784 |
+
"epoch": 9.23,
|
| 7785 |
+
"learning_rate": 3.4417459847646946e-05,
|
| 7786 |
+
"loss": 2.8369,
|
| 7787 |
+
"step": 641500
|
| 7788 |
+
},
|
| 7789 |
+
{
|
| 7790 |
+
"epoch": 9.24,
|
| 7791 |
+
"learning_rate": 3.436553310589421e-05,
|
| 7792 |
+
"loss": 2.8379,
|
| 7793 |
+
"step": 642000
|
| 7794 |
+
},
|
| 7795 |
+
{
|
| 7796 |
+
"epoch": 9.25,
|
| 7797 |
+
"learning_rate": 3.431360636414147e-05,
|
| 7798 |
+
"loss": 2.8327,
|
| 7799 |
+
"step": 642500
|
| 7800 |
+
},
|
| 7801 |
+
{
|
| 7802 |
+
"epoch": 9.26,
|
| 7803 |
+
"learning_rate": 3.426167962238874e-05,
|
| 7804 |
+
"loss": 2.8311,
|
| 7805 |
+
"step": 643000
|
| 7806 |
+
},
|
| 7807 |
+
{
|
| 7808 |
+
"epoch": 9.26,
|
| 7809 |
+
"learning_rate": 3.4209752880636e-05,
|
| 7810 |
+
"loss": 2.8275,
|
| 7811 |
+
"step": 643500
|
| 7812 |
+
},
|
| 7813 |
+
{
|
| 7814 |
+
"epoch": 9.27,
|
| 7815 |
+
"learning_rate": 3.415782613888327e-05,
|
| 7816 |
+
"loss": 2.834,
|
| 7817 |
+
"step": 644000
|
| 7818 |
+
},
|
| 7819 |
+
{
|
| 7820 |
+
"epoch": 9.28,
|
| 7821 |
+
"learning_rate": 3.410589939713053e-05,
|
| 7822 |
+
"loss": 2.8364,
|
| 7823 |
+
"step": 644500
|
| 7824 |
+
},
|
| 7825 |
+
{
|
| 7826 |
+
"epoch": 9.28,
|
| 7827 |
+
"learning_rate": 3.40540765088613e-05,
|
| 7828 |
+
"loss": 2.832,
|
| 7829 |
+
"step": 645000
|
| 7830 |
+
},
|
| 7831 |
+
{
|
| 7832 |
+
"epoch": 9.29,
|
| 7833 |
+
"learning_rate": 3.400214976710856e-05,
|
| 7834 |
+
"loss": 2.8355,
|
| 7835 |
+
"step": 645500
|
| 7836 |
+
},
|
| 7837 |
+
{
|
| 7838 |
+
"epoch": 9.3,
|
| 7839 |
+
"learning_rate": 3.395022302535583e-05,
|
| 7840 |
+
"loss": 2.8331,
|
| 7841 |
+
"step": 646000
|
| 7842 |
+
},
|
| 7843 |
+
{
|
| 7844 |
+
"epoch": 9.31,
|
| 7845 |
+
"learning_rate": 3.389829628360309e-05,
|
| 7846 |
+
"loss": 2.833,
|
| 7847 |
+
"step": 646500
|
| 7848 |
+
},
|
| 7849 |
+
{
|
| 7850 |
+
"epoch": 9.31,
|
| 7851 |
+
"learning_rate": 3.3846473395333866e-05,
|
| 7852 |
+
"loss": 2.8313,
|
| 7853 |
+
"step": 647000
|
| 7854 |
+
},
|
| 7855 |
+
{
|
| 7856 |
+
"epoch": 9.32,
|
| 7857 |
+
"learning_rate": 3.379454665358113e-05,
|
| 7858 |
+
"loss": 2.8312,
|
| 7859 |
+
"step": 647500
|
| 7860 |
+
},
|
| 7861 |
+
{
|
| 7862 |
+
"epoch": 9.33,
|
| 7863 |
+
"learning_rate": 3.3742619911828396e-05,
|
| 7864 |
+
"loss": 2.8333,
|
| 7865 |
+
"step": 648000
|
| 7866 |
+
},
|
| 7867 |
+
{
|
| 7868 |
+
"epoch": 9.33,
|
| 7869 |
+
"learning_rate": 3.369069317007566e-05,
|
| 7870 |
+
"loss": 2.8306,
|
| 7871 |
+
"step": 648500
|
| 7872 |
+
},
|
| 7873 |
+
{
|
| 7874 |
+
"epoch": 9.34,
|
| 7875 |
+
"learning_rate": 3.363887028180643e-05,
|
| 7876 |
+
"loss": 2.832,
|
| 7877 |
+
"step": 649000
|
| 7878 |
+
},
|
| 7879 |
+
{
|
| 7880 |
+
"epoch": 9.35,
|
| 7881 |
+
"learning_rate": 3.358694354005369e-05,
|
| 7882 |
+
"loss": 2.8346,
|
| 7883 |
+
"step": 649500
|
| 7884 |
+
},
|
| 7885 |
+
{
|
| 7886 |
+
"epoch": 9.36,
|
| 7887 |
+
"learning_rate": 3.353501679830096e-05,
|
| 7888 |
+
"loss": 2.8321,
|
| 7889 |
+
"step": 650000
|
| 7890 |
+
},
|
| 7891 |
+
{
|
| 7892 |
+
"epoch": 9.36,
|
| 7893 |
+
"learning_rate": 3.348309005654822e-05,
|
| 7894 |
+
"loss": 2.8266,
|
| 7895 |
+
"step": 650500
|
| 7896 |
+
},
|
| 7897 |
+
{
|
| 7898 |
+
"epoch": 9.37,
|
| 7899 |
+
"learning_rate": 3.343116331479549e-05,
|
| 7900 |
+
"loss": 2.836,
|
| 7901 |
+
"step": 651000
|
| 7902 |
+
},
|
| 7903 |
+
{
|
| 7904 |
+
"epoch": 9.38,
|
| 7905 |
+
"learning_rate": 3.3379340426526256e-05,
|
| 7906 |
+
"loss": 2.8332,
|
| 7907 |
+
"step": 651500
|
| 7908 |
+
},
|
| 7909 |
+
{
|
| 7910 |
+
"epoch": 9.38,
|
| 7911 |
+
"learning_rate": 3.3327413684773525e-05,
|
| 7912 |
+
"loss": 2.8319,
|
| 7913 |
+
"step": 652000
|
| 7914 |
+
},
|
| 7915 |
+
{
|
| 7916 |
+
"epoch": 9.39,
|
| 7917 |
+
"learning_rate": 3.3275486943020786e-05,
|
| 7918 |
+
"loss": 2.8319,
|
| 7919 |
+
"step": 652500
|
| 7920 |
+
},
|
| 7921 |
+
{
|
| 7922 |
+
"epoch": 9.4,
|
| 7923 |
+
"learning_rate": 3.322356020126805e-05,
|
| 7924 |
+
"loss": 2.8284,
|
| 7925 |
+
"step": 653000
|
| 7926 |
+
},
|
| 7927 |
+
{
|
| 7928 |
+
"epoch": 9.41,
|
| 7929 |
+
"learning_rate": 3.3171633459515316e-05,
|
| 7930 |
+
"loss": 2.8259,
|
| 7931 |
+
"step": 653500
|
| 7932 |
+
},
|
| 7933 |
+
{
|
| 7934 |
+
"epoch": 9.41,
|
| 7935 |
+
"learning_rate": 3.3119706717762584e-05,
|
| 7936 |
+
"loss": 2.8288,
|
| 7937 |
+
"step": 654000
|
| 7938 |
+
},
|
| 7939 |
+
{
|
| 7940 |
+
"epoch": 9.42,
|
| 7941 |
+
"learning_rate": 3.306777997600985e-05,
|
| 7942 |
+
"loss": 2.8329,
|
| 7943 |
+
"step": 654500
|
| 7944 |
+
},
|
| 7945 |
+
{
|
| 7946 |
+
"epoch": 9.43,
|
| 7947 |
+
"learning_rate": 3.301585323425711e-05,
|
| 7948 |
+
"loss": 2.8321,
|
| 7949 |
+
"step": 655000
|
| 7950 |
+
},
|
| 7951 |
+
{
|
| 7952 |
+
"epoch": 9.44,
|
| 7953 |
+
"learning_rate": 3.296403034598789e-05,
|
| 7954 |
+
"loss": 2.8312,
|
| 7955 |
+
"step": 655500
|
| 7956 |
+
},
|
| 7957 |
+
{
|
| 7958 |
+
"epoch": 9.44,
|
| 7959 |
+
"learning_rate": 3.291210360423514e-05,
|
| 7960 |
+
"loss": 2.8308,
|
| 7961 |
+
"step": 656000
|
| 7962 |
+
},
|
| 7963 |
+
{
|
| 7964 |
+
"epoch": 9.45,
|
| 7965 |
+
"learning_rate": 3.286017686248241e-05,
|
| 7966 |
+
"loss": 2.8266,
|
| 7967 |
+
"step": 656500
|
| 7968 |
+
},
|
| 7969 |
+
{
|
| 7970 |
+
"epoch": 9.46,
|
| 7971 |
+
"learning_rate": 3.280825012072968e-05,
|
| 7972 |
+
"loss": 2.8317,
|
| 7973 |
+
"step": 657000
|
| 7974 |
+
},
|
| 7975 |
+
{
|
| 7976 |
+
"epoch": 9.46,
|
| 7977 |
+
"learning_rate": 3.2756427232460445e-05,
|
| 7978 |
+
"loss": 2.8337,
|
| 7979 |
+
"step": 657500
|
| 7980 |
+
},
|
| 7981 |
+
{
|
| 7982 |
+
"epoch": 9.47,
|
| 7983 |
+
"learning_rate": 3.270450049070771e-05,
|
| 7984 |
+
"loss": 2.8302,
|
| 7985 |
+
"step": 658000
|
| 7986 |
+
},
|
| 7987 |
+
{
|
| 7988 |
+
"epoch": 9.48,
|
| 7989 |
+
"learning_rate": 3.2652573748954974e-05,
|
| 7990 |
+
"loss": 2.8275,
|
| 7991 |
+
"step": 658500
|
| 7992 |
+
},
|
| 7993 |
+
{
|
| 7994 |
+
"epoch": 9.49,
|
| 7995 |
+
"learning_rate": 3.260064700720224e-05,
|
| 7996 |
+
"loss": 2.8284,
|
| 7997 |
+
"step": 659000
|
| 7998 |
+
},
|
| 7999 |
+
{
|
| 8000 |
+
"epoch": 9.49,
|
| 8001 |
+
"learning_rate": 3.2548824118933016e-05,
|
| 8002 |
+
"loss": 2.8285,
|
| 8003 |
+
"step": 659500
|
| 8004 |
+
},
|
| 8005 |
+
{
|
| 8006 |
+
"epoch": 9.5,
|
| 8007 |
+
"learning_rate": 3.249689737718028e-05,
|
| 8008 |
+
"loss": 2.8306,
|
| 8009 |
+
"step": 660000
|
| 8010 |
+
},
|
| 8011 |
+
{
|
| 8012 |
+
"epoch": 9.51,
|
| 8013 |
+
"learning_rate": 3.244497063542754e-05,
|
| 8014 |
+
"loss": 2.8324,
|
| 8015 |
+
"step": 660500
|
| 8016 |
+
},
|
| 8017 |
+
{
|
| 8018 |
+
"epoch": 9.51,
|
| 8019 |
+
"learning_rate": 3.239304389367481e-05,
|
| 8020 |
+
"loss": 2.8272,
|
| 8021 |
+
"step": 661000
|
| 8022 |
+
},
|
| 8023 |
+
{
|
| 8024 |
+
"epoch": 9.52,
|
| 8025 |
+
"learning_rate": 3.234111715192207e-05,
|
| 8026 |
+
"loss": 2.8337,
|
| 8027 |
+
"step": 661500
|
| 8028 |
+
},
|
| 8029 |
+
{
|
| 8030 |
+
"epoch": 9.53,
|
| 8031 |
+
"learning_rate": 3.228919041016934e-05,
|
| 8032 |
+
"loss": 2.8277,
|
| 8033 |
+
"step": 662000
|
| 8034 |
+
},
|
| 8035 |
+
{
|
| 8036 |
+
"epoch": 9.54,
|
| 8037 |
+
"learning_rate": 3.22372636684166e-05,
|
| 8038 |
+
"loss": 2.8251,
|
| 8039 |
+
"step": 662500
|
| 8040 |
+
},
|
| 8041 |
+
{
|
| 8042 |
+
"epoch": 9.54,
|
| 8043 |
+
"learning_rate": 3.218544078014737e-05,
|
| 8044 |
+
"loss": 2.831,
|
| 8045 |
+
"step": 663000
|
| 8046 |
+
},
|
| 8047 |
+
{
|
| 8048 |
+
"epoch": 9.55,
|
| 8049 |
+
"learning_rate": 3.213351403839463e-05,
|
| 8050 |
+
"loss": 2.8286,
|
| 8051 |
+
"step": 663500
|
| 8052 |
+
},
|
| 8053 |
+
{
|
| 8054 |
+
"epoch": 9.56,
|
| 8055 |
+
"learning_rate": 3.20815872966419e-05,
|
| 8056 |
+
"loss": 2.8298,
|
| 8057 |
+
"step": 664000
|
| 8058 |
+
},
|
| 8059 |
+
{
|
| 8060 |
+
"epoch": 9.56,
|
| 8061 |
+
"learning_rate": 3.202966055488916e-05,
|
| 8062 |
+
"loss": 2.8234,
|
| 8063 |
+
"step": 664500
|
| 8064 |
+
},
|
| 8065 |
+
{
|
| 8066 |
+
"epoch": 9.57,
|
| 8067 |
+
"learning_rate": 3.197773381313643e-05,
|
| 8068 |
+
"loss": 2.8285,
|
| 8069 |
+
"step": 665000
|
| 8070 |
+
},
|
| 8071 |
+
{
|
| 8072 |
+
"epoch": 9.58,
|
| 8073 |
+
"learning_rate": 3.192580707138369e-05,
|
| 8074 |
+
"loss": 2.8285,
|
| 8075 |
+
"step": 665500
|
| 8076 |
+
},
|
| 8077 |
+
{
|
| 8078 |
+
"epoch": 9.59,
|
| 8079 |
+
"learning_rate": 3.1873880329630954e-05,
|
| 8080 |
+
"loss": 2.8288,
|
| 8081 |
+
"step": 666000
|
| 8082 |
+
},
|
| 8083 |
+
{
|
| 8084 |
+
"epoch": 9.59,
|
| 8085 |
+
"learning_rate": 3.182195358787822e-05,
|
| 8086 |
+
"loss": 2.8278,
|
| 8087 |
+
"step": 666500
|
| 8088 |
+
},
|
| 8089 |
+
{
|
| 8090 |
+
"epoch": 9.6,
|
| 8091 |
+
"learning_rate": 3.177013069960899e-05,
|
| 8092 |
+
"loss": 2.8275,
|
| 8093 |
+
"step": 667000
|
| 8094 |
+
},
|
| 8095 |
+
{
|
| 8096 |
+
"epoch": 9.61,
|
| 8097 |
+
"learning_rate": 3.171830781133976e-05,
|
| 8098 |
+
"loss": 2.8216,
|
| 8099 |
+
"step": 667500
|
| 8100 |
+
},
|
| 8101 |
+
{
|
| 8102 |
+
"epoch": 9.62,
|
| 8103 |
+
"learning_rate": 3.166638106958702e-05,
|
| 8104 |
+
"loss": 2.8224,
|
| 8105 |
+
"step": 668000
|
| 8106 |
+
},
|
| 8107 |
+
{
|
| 8108 |
+
"epoch": 9.62,
|
| 8109 |
+
"learning_rate": 3.161445432783429e-05,
|
| 8110 |
+
"loss": 2.8297,
|
| 8111 |
+
"step": 668500
|
| 8112 |
+
},
|
| 8113 |
+
{
|
| 8114 |
+
"epoch": 9.63,
|
| 8115 |
+
"learning_rate": 3.156252758608156e-05,
|
| 8116 |
+
"loss": 2.827,
|
| 8117 |
+
"step": 669000
|
| 8118 |
+
},
|
| 8119 |
+
{
|
| 8120 |
+
"epoch": 9.64,
|
| 8121 |
+
"learning_rate": 3.151060084432883e-05,
|
| 8122 |
+
"loss": 2.8302,
|
| 8123 |
+
"step": 669500
|
| 8124 |
+
},
|
| 8125 |
+
{
|
| 8126 |
+
"epoch": 9.64,
|
| 8127 |
+
"learning_rate": 3.1458777956059594e-05,
|
| 8128 |
+
"loss": 2.8289,
|
| 8129 |
+
"step": 670000
|
| 8130 |
+
},
|
| 8131 |
+
{
|
| 8132 |
+
"epoch": 9.65,
|
| 8133 |
+
"learning_rate": 3.140685121430686e-05,
|
| 8134 |
+
"loss": 2.8283,
|
| 8135 |
+
"step": 670500
|
| 8136 |
+
},
|
| 8137 |
+
{
|
| 8138 |
+
"epoch": 9.66,
|
| 8139 |
+
"learning_rate": 3.135492447255412e-05,
|
| 8140 |
+
"loss": 2.824,
|
| 8141 |
+
"step": 671000
|
| 8142 |
+
},
|
| 8143 |
+
{
|
| 8144 |
+
"epoch": 9.67,
|
| 8145 |
+
"learning_rate": 3.1302997730801385e-05,
|
| 8146 |
+
"loss": 2.825,
|
| 8147 |
+
"step": 671500
|
| 8148 |
+
},
|
| 8149 |
+
{
|
| 8150 |
+
"epoch": 9.67,
|
| 8151 |
+
"learning_rate": 3.125117484253215e-05,
|
| 8152 |
+
"loss": 2.8198,
|
| 8153 |
+
"step": 672000
|
| 8154 |
+
},
|
| 8155 |
+
{
|
| 8156 |
+
"epoch": 9.68,
|
| 8157 |
+
"learning_rate": 3.119924810077942e-05,
|
| 8158 |
+
"loss": 2.8254,
|
| 8159 |
+
"step": 672500
|
| 8160 |
+
},
|
| 8161 |
+
{
|
| 8162 |
+
"epoch": 9.69,
|
| 8163 |
+
"learning_rate": 3.114732135902669e-05,
|
| 8164 |
+
"loss": 2.8216,
|
| 8165 |
+
"step": 673000
|
| 8166 |
+
},
|
| 8167 |
+
{
|
| 8168 |
+
"epoch": 9.69,
|
| 8169 |
+
"learning_rate": 3.1095394617273957e-05,
|
| 8170 |
+
"loss": 2.8273,
|
| 8171 |
+
"step": 673500
|
| 8172 |
+
},
|
| 8173 |
+
{
|
| 8174 |
+
"epoch": 9.7,
|
| 8175 |
+
"learning_rate": 3.104346787552122e-05,
|
| 8176 |
+
"loss": 2.8231,
|
| 8177 |
+
"step": 674000
|
| 8178 |
+
},
|
| 8179 |
+
{
|
| 8180 |
+
"epoch": 9.71,
|
| 8181 |
+
"learning_rate": 3.099154113376848e-05,
|
| 8182 |
+
"loss": 2.8267,
|
| 8183 |
+
"step": 674500
|
| 8184 |
+
},
|
| 8185 |
+
{
|
| 8186 |
+
"epoch": 9.72,
|
| 8187 |
+
"learning_rate": 3.093971824549925e-05,
|
| 8188 |
+
"loss": 2.8241,
|
| 8189 |
+
"step": 675000
|
| 8190 |
+
},
|
| 8191 |
+
{
|
| 8192 |
+
"epoch": 9.72,
|
| 8193 |
+
"learning_rate": 3.0887791503746514e-05,
|
| 8194 |
+
"loss": 2.8218,
|
| 8195 |
+
"step": 675500
|
| 8196 |
+
},
|
| 8197 |
+
{
|
| 8198 |
+
"epoch": 9.73,
|
| 8199 |
+
"learning_rate": 3.083586476199378e-05,
|
| 8200 |
+
"loss": 2.8262,
|
| 8201 |
+
"step": 676000
|
| 8202 |
+
},
|
| 8203 |
+
{
|
| 8204 |
+
"epoch": 9.74,
|
| 8205 |
+
"learning_rate": 3.0783938020241044e-05,
|
| 8206 |
+
"loss": 2.8207,
|
| 8207 |
+
"step": 676500
|
| 8208 |
+
},
|
| 8209 |
+
{
|
| 8210 |
+
"epoch": 9.74,
|
| 8211 |
+
"learning_rate": 3.073201127848831e-05,
|
| 8212 |
+
"loss": 2.8265,
|
| 8213 |
+
"step": 677000
|
| 8214 |
+
},
|
| 8215 |
+
{
|
| 8216 |
+
"epoch": 9.75,
|
| 8217 |
+
"learning_rate": 3.068018839021908e-05,
|
| 8218 |
+
"loss": 2.8176,
|
| 8219 |
+
"step": 677500
|
| 8220 |
+
},
|
| 8221 |
+
{
|
| 8222 |
+
"epoch": 9.76,
|
| 8223 |
+
"learning_rate": 3.062826164846635e-05,
|
| 8224 |
+
"loss": 2.8262,
|
| 8225 |
+
"step": 678000
|
| 8226 |
+
},
|
| 8227 |
+
{
|
| 8228 |
+
"epoch": 9.77,
|
| 8229 |
+
"learning_rate": 3.057633490671361e-05,
|
| 8230 |
+
"loss": 2.8195,
|
| 8231 |
+
"step": 678500
|
| 8232 |
+
},
|
| 8233 |
+
{
|
| 8234 |
+
"epoch": 9.77,
|
| 8235 |
+
"learning_rate": 3.0524408164960877e-05,
|
| 8236 |
+
"loss": 2.8232,
|
| 8237 |
+
"step": 679000
|
| 8238 |
+
},
|
| 8239 |
+
{
|
| 8240 |
+
"epoch": 9.78,
|
| 8241 |
+
"learning_rate": 3.0472481423208138e-05,
|
| 8242 |
+
"loss": 2.8231,
|
| 8243 |
+
"step": 679500
|
| 8244 |
+
},
|
| 8245 |
+
{
|
| 8246 |
+
"epoch": 9.79,
|
| 8247 |
+
"learning_rate": 3.0420554681455403e-05,
|
| 8248 |
+
"loss": 2.8215,
|
| 8249 |
+
"step": 680000
|
| 8250 |
+
},
|
| 8251 |
+
{
|
| 8252 |
+
"epoch": 9.8,
|
| 8253 |
+
"learning_rate": 3.036862793970267e-05,
|
| 8254 |
+
"loss": 2.825,
|
| 8255 |
+
"step": 680500
|
| 8256 |
+
},
|
| 8257 |
+
{
|
| 8258 |
+
"epoch": 9.8,
|
| 8259 |
+
"learning_rate": 3.0316701197949933e-05,
|
| 8260 |
+
"loss": 2.8225,
|
| 8261 |
+
"step": 681000
|
| 8262 |
+
},
|
| 8263 |
+
{
|
| 8264 |
+
"epoch": 9.81,
|
| 8265 |
+
"learning_rate": 3.0264878309680706e-05,
|
| 8266 |
+
"loss": 2.8144,
|
| 8267 |
+
"step": 681500
|
| 8268 |
+
},
|
| 8269 |
+
{
|
| 8270 |
+
"epoch": 9.82,
|
| 8271 |
+
"learning_rate": 3.0212951567927967e-05,
|
| 8272 |
+
"loss": 2.8204,
|
| 8273 |
+
"step": 682000
|
| 8274 |
+
},
|
| 8275 |
+
{
|
| 8276 |
+
"epoch": 9.82,
|
| 8277 |
+
"learning_rate": 3.0161024826175232e-05,
|
| 8278 |
+
"loss": 2.8276,
|
| 8279 |
+
"step": 682500
|
| 8280 |
+
},
|
| 8281 |
+
{
|
| 8282 |
+
"epoch": 9.83,
|
| 8283 |
+
"learning_rate": 3.01090980844225e-05,
|
| 8284 |
+
"loss": 2.8234,
|
| 8285 |
+
"step": 683000
|
| 8286 |
+
},
|
| 8287 |
+
{
|
| 8288 |
+
"epoch": 9.84,
|
| 8289 |
+
"learning_rate": 3.0057379049636775e-05,
|
| 8290 |
+
"loss": 2.8245,
|
| 8291 |
+
"step": 683500
|
| 8292 |
+
},
|
| 8293 |
+
{
|
| 8294 |
+
"epoch": 9.85,
|
| 8295 |
+
"learning_rate": 3.000545230788404e-05,
|
| 8296 |
+
"loss": 2.8231,
|
| 8297 |
+
"step": 684000
|
| 8298 |
+
},
|
| 8299 |
+
{
|
| 8300 |
+
"epoch": 9.85,
|
| 8301 |
+
"learning_rate": 2.99535255661313e-05,
|
| 8302 |
+
"loss": 2.8219,
|
| 8303 |
+
"step": 684500
|
| 8304 |
+
},
|
| 8305 |
+
{
|
| 8306 |
+
"epoch": 9.86,
|
| 8307 |
+
"learning_rate": 2.990159882437857e-05,
|
| 8308 |
+
"loss": 2.821,
|
| 8309 |
+
"step": 685000
|
| 8310 |
+
},
|
| 8311 |
+
{
|
| 8312 |
+
"epoch": 9.87,
|
| 8313 |
+
"learning_rate": 2.9849672082625835e-05,
|
| 8314 |
+
"loss": 2.8236,
|
| 8315 |
+
"step": 685500
|
| 8316 |
+
},
|
| 8317 |
+
{
|
| 8318 |
+
"epoch": 9.87,
|
| 8319 |
+
"learning_rate": 2.9797745340873096e-05,
|
| 8320 |
+
"loss": 2.8212,
|
| 8321 |
+
"step": 686000
|
| 8322 |
+
},
|
| 8323 |
+
{
|
| 8324 |
+
"epoch": 9.88,
|
| 8325 |
+
"learning_rate": 2.974581859912036e-05,
|
| 8326 |
+
"loss": 2.8231,
|
| 8327 |
+
"step": 686500
|
| 8328 |
+
},
|
| 8329 |
+
{
|
| 8330 |
+
"epoch": 9.89,
|
| 8331 |
+
"learning_rate": 2.969399571085113e-05,
|
| 8332 |
+
"loss": 2.8228,
|
| 8333 |
+
"step": 687000
|
| 8334 |
+
},
|
| 8335 |
+
{
|
| 8336 |
+
"epoch": 9.9,
|
| 8337 |
+
"learning_rate": 2.9642068969098395e-05,
|
| 8338 |
+
"loss": 2.8205,
|
| 8339 |
+
"step": 687500
|
| 8340 |
+
},
|
| 8341 |
+
{
|
| 8342 |
+
"epoch": 9.9,
|
| 8343 |
+
"learning_rate": 2.9590142227345664e-05,
|
| 8344 |
+
"loss": 2.8207,
|
| 8345 |
+
"step": 688000
|
| 8346 |
+
},
|
| 8347 |
+
{
|
| 8348 |
+
"epoch": 9.91,
|
| 8349 |
+
"learning_rate": 2.953821548559293e-05,
|
| 8350 |
+
"loss": 2.8209,
|
| 8351 |
+
"step": 688500
|
| 8352 |
+
},
|
| 8353 |
+
{
|
| 8354 |
+
"epoch": 9.92,
|
| 8355 |
+
"learning_rate": 2.948628874384019e-05,
|
| 8356 |
+
"loss": 2.8211,
|
| 8357 |
+
"step": 689000
|
| 8358 |
+
},
|
| 8359 |
+
{
|
| 8360 |
+
"epoch": 9.92,
|
| 8361 |
+
"learning_rate": 2.9434362002087455e-05,
|
| 8362 |
+
"loss": 2.8211,
|
| 8363 |
+
"step": 689500
|
| 8364 |
+
},
|
| 8365 |
+
{
|
| 8366 |
+
"epoch": 9.93,
|
| 8367 |
+
"learning_rate": 2.9382435260334723e-05,
|
| 8368 |
+
"loss": 2.8194,
|
| 8369 |
+
"step": 690000
|
| 8370 |
+
},
|
| 8371 |
+
{
|
| 8372 |
+
"epoch": 9.94,
|
| 8373 |
+
"learning_rate": 2.9330508518581985e-05,
|
| 8374 |
+
"loss": 2.8241,
|
| 8375 |
+
"step": 690500
|
| 8376 |
+
},
|
| 8377 |
+
{
|
| 8378 |
+
"epoch": 9.95,
|
| 8379 |
+
"learning_rate": 2.9278685630312758e-05,
|
| 8380 |
+
"loss": 2.8171,
|
| 8381 |
+
"step": 691000
|
| 8382 |
+
},
|
| 8383 |
+
{
|
| 8384 |
+
"epoch": 9.95,
|
| 8385 |
+
"learning_rate": 2.922675888856002e-05,
|
| 8386 |
+
"loss": 2.8193,
|
| 8387 |
+
"step": 691500
|
| 8388 |
+
},
|
| 8389 |
+
{
|
| 8390 |
+
"epoch": 9.96,
|
| 8391 |
+
"learning_rate": 2.9174832146807284e-05,
|
| 8392 |
+
"loss": 2.8213,
|
| 8393 |
+
"step": 692000
|
| 8394 |
+
},
|
| 8395 |
+
{
|
| 8396 |
+
"epoch": 9.97,
|
| 8397 |
+
"learning_rate": 2.9122905405054553e-05,
|
| 8398 |
+
"loss": 2.8249,
|
| 8399 |
+
"step": 692500
|
| 8400 |
+
},
|
| 8401 |
+
{
|
| 8402 |
+
"epoch": 9.98,
|
| 8403 |
+
"learning_rate": 2.9070978663301818e-05,
|
| 8404 |
+
"loss": 2.8247,
|
| 8405 |
+
"step": 693000
|
| 8406 |
+
},
|
| 8407 |
+
{
|
| 8408 |
+
"epoch": 9.98,
|
| 8409 |
+
"learning_rate": 2.9019155775032587e-05,
|
| 8410 |
+
"loss": 2.8203,
|
| 8411 |
+
"step": 693500
|
| 8412 |
+
},
|
| 8413 |
+
{
|
| 8414 |
+
"epoch": 9.99,
|
| 8415 |
+
"learning_rate": 2.8967229033279852e-05,
|
| 8416 |
+
"loss": 2.8256,
|
| 8417 |
+
"step": 694000
|
| 8418 |
+
},
|
| 8419 |
+
{
|
| 8420 |
+
"epoch": 10.0,
|
| 8421 |
+
"learning_rate": 2.8915302291527114e-05,
|
| 8422 |
+
"loss": 2.8163,
|
| 8423 |
+
"step": 694500
|
| 8424 |
+
},
|
| 8425 |
+
{
|
| 8426 |
+
"epoch": 10.0,
|
| 8427 |
+
"eval_accuracy": 0.5010026952779273,
|
| 8428 |
+
"eval_loss": 2.650995969772339,
|
| 8429 |
+
"eval_runtime": 555.6978,
|
| 8430 |
+
"eval_samples_per_second": 969.838,
|
| 8431 |
+
"eval_steps_per_second": 40.41,
|
| 8432 |
+
"step": 694730
|
| 8433 |
}
|
| 8434 |
],
|
| 8435 |
"max_steps": 972622,
|
| 8436 |
"num_train_epochs": 14,
|
| 8437 |
+
"total_flos": 4.2724234309696225e+18,
|
| 8438 |
"trial_name": null,
|
| 8439 |
"trial_params": null
|
| 8440 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:020c47f09229e7b4397da1597dd814d8bad9db375a9c2b2366593d834ff17bb0
|
| 3 |
size 118242180
|
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25c4e9beee823a5935e50174eb1417fc4c243aa090bff6d0ab7ea1c65de8bda8
|
| 3 |
+
size 228856
|