Training in progress, epoch 14
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236469913
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f909ba46f0a48bc129a704d4f24cd179ebf4211739e8aa4c8a085db9561d2870
|
| 3 |
size 236469913
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae1fe310f1f9e883c23d6725718feca058676978792d4f4de4c8dbbe2df9c17e
|
| 3 |
size 118242180
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e4b49c3d9f3fe57b8e1f1b69c51f85df70aed9ab2c7da43cab2725d7c8ce2a3
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ae897e79b48da1251412a7aa5215b19754cf4f6407daccf2cbcc48d6b54e5a8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d33ec1b8096db043dca5e9616e048b70e27d0805be063f9fe36f9c6a19119ff8
|
| 3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a0937b93f49d7efd83aa7fa86bb2e746d0887803a2bd13587895ce811116ea1
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca9ecaac6bfa92bcf5ac19f559074b5567002f2b1aba65ed23951aa76e1f3154
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -10959,11 +10959,854 @@
|
|
| 10959 |
"eval_samples_per_second": 971.361,
|
| 10960 |
"eval_steps_per_second": 40.474,
|
| 10961 |
"step": 903149
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10962 |
}
|
| 10963 |
],
|
| 10964 |
"max_steps": 972622,
|
| 10965 |
"num_train_epochs": 14,
|
| 10966 |
-
"total_flos": 5.
|
| 10967 |
"trial_name": null,
|
| 10968 |
"trial_params": null
|
| 10969 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.0,
|
| 5 |
+
"global_step": 972622,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 10959 |
"eval_samples_per_second": 971.361,
|
| 10960 |
"eval_steps_per_second": 40.474,
|
| 10961 |
"step": 903149
|
| 10962 |
+
},
|
| 10963 |
+
{
|
| 10964 |
+
"epoch": 13.01,
|
| 10965 |
+
"learning_rate": 7.218440224531231e-06,
|
| 10966 |
+
"loss": 2.7671,
|
| 10967 |
+
"step": 903500
|
| 10968 |
+
},
|
| 10969 |
+
{
|
| 10970 |
+
"epoch": 13.01,
|
| 10971 |
+
"learning_rate": 7.1665134827784964e-06,
|
| 10972 |
+
"loss": 2.7743,
|
| 10973 |
+
"step": 904000
|
| 10974 |
+
},
|
| 10975 |
+
{
|
| 10976 |
+
"epoch": 13.02,
|
| 10977 |
+
"learning_rate": 7.114586741025761e-06,
|
| 10978 |
+
"loss": 2.7692,
|
| 10979 |
+
"step": 904500
|
| 10980 |
+
},
|
| 10981 |
+
{
|
| 10982 |
+
"epoch": 13.03,
|
| 10983 |
+
"learning_rate": 7.062659999273026e-06,
|
| 10984 |
+
"loss": 2.7681,
|
| 10985 |
+
"step": 905000
|
| 10986 |
+
},
|
| 10987 |
+
{
|
| 10988 |
+
"epoch": 13.03,
|
| 10989 |
+
"learning_rate": 7.010733257520291e-06,
|
| 10990 |
+
"loss": 2.7708,
|
| 10991 |
+
"step": 905500
|
| 10992 |
+
},
|
| 10993 |
+
{
|
| 10994 |
+
"epoch": 13.04,
|
| 10995 |
+
"learning_rate": 6.958910369251061e-06,
|
| 10996 |
+
"loss": 2.7629,
|
| 10997 |
+
"step": 906000
|
| 10998 |
+
},
|
| 10999 |
+
{
|
| 11000 |
+
"epoch": 13.05,
|
| 11001 |
+
"learning_rate": 6.906983627498326e-06,
|
| 11002 |
+
"loss": 2.766,
|
| 11003 |
+
"step": 906500
|
| 11004 |
+
},
|
| 11005 |
+
{
|
| 11006 |
+
"epoch": 13.06,
|
| 11007 |
+
"learning_rate": 6.855056885745591e-06,
|
| 11008 |
+
"loss": 2.7656,
|
| 11009 |
+
"step": 907000
|
| 11010 |
+
},
|
| 11011 |
+
{
|
| 11012 |
+
"epoch": 13.06,
|
| 11013 |
+
"learning_rate": 6.8031301439928555e-06,
|
| 11014 |
+
"loss": 2.7697,
|
| 11015 |
+
"step": 907500
|
| 11016 |
+
},
|
| 11017 |
+
{
|
| 11018 |
+
"epoch": 13.07,
|
| 11019 |
+
"learning_rate": 6.7512034022401195e-06,
|
| 11020 |
+
"loss": 2.7668,
|
| 11021 |
+
"step": 908000
|
| 11022 |
+
},
|
| 11023 |
+
{
|
| 11024 |
+
"epoch": 13.08,
|
| 11025 |
+
"learning_rate": 6.6992766604873845e-06,
|
| 11026 |
+
"loss": 2.769,
|
| 11027 |
+
"step": 908500
|
| 11028 |
+
},
|
| 11029 |
+
{
|
| 11030 |
+
"epoch": 13.08,
|
| 11031 |
+
"learning_rate": 6.647453772218156e-06,
|
| 11032 |
+
"loss": 2.7645,
|
| 11033 |
+
"step": 909000
|
| 11034 |
+
},
|
| 11035 |
+
{
|
| 11036 |
+
"epoch": 13.09,
|
| 11037 |
+
"learning_rate": 6.595527030465419e-06,
|
| 11038 |
+
"loss": 2.7676,
|
| 11039 |
+
"step": 909500
|
| 11040 |
+
},
|
| 11041 |
+
{
|
| 11042 |
+
"epoch": 13.1,
|
| 11043 |
+
"learning_rate": 6.543600288712684e-06,
|
| 11044 |
+
"loss": 2.7681,
|
| 11045 |
+
"step": 910000
|
| 11046 |
+
},
|
| 11047 |
+
{
|
| 11048 |
+
"epoch": 13.11,
|
| 11049 |
+
"learning_rate": 6.491673546959949e-06,
|
| 11050 |
+
"loss": 2.7687,
|
| 11051 |
+
"step": 910500
|
| 11052 |
+
},
|
| 11053 |
+
{
|
| 11054 |
+
"epoch": 13.11,
|
| 11055 |
+
"learning_rate": 6.43985065869072e-06,
|
| 11056 |
+
"loss": 2.7705,
|
| 11057 |
+
"step": 911000
|
| 11058 |
+
},
|
| 11059 |
+
{
|
| 11060 |
+
"epoch": 13.12,
|
| 11061 |
+
"learning_rate": 6.387923916937983e-06,
|
| 11062 |
+
"loss": 2.7645,
|
| 11063 |
+
"step": 911500
|
| 11064 |
+
},
|
| 11065 |
+
{
|
| 11066 |
+
"epoch": 13.13,
|
| 11067 |
+
"learning_rate": 6.335997175185249e-06,
|
| 11068 |
+
"loss": 2.7686,
|
| 11069 |
+
"step": 912000
|
| 11070 |
+
},
|
| 11071 |
+
{
|
| 11072 |
+
"epoch": 13.13,
|
| 11073 |
+
"learning_rate": 6.284070433432514e-06,
|
| 11074 |
+
"loss": 2.7711,
|
| 11075 |
+
"step": 912500
|
| 11076 |
+
},
|
| 11077 |
+
{
|
| 11078 |
+
"epoch": 13.14,
|
| 11079 |
+
"learning_rate": 6.232143691679779e-06,
|
| 11080 |
+
"loss": 2.7682,
|
| 11081 |
+
"step": 913000
|
| 11082 |
+
},
|
| 11083 |
+
{
|
| 11084 |
+
"epoch": 13.15,
|
| 11085 |
+
"learning_rate": 6.1803208034105485e-06,
|
| 11086 |
+
"loss": 2.7709,
|
| 11087 |
+
"step": 913500
|
| 11088 |
+
},
|
| 11089 |
+
{
|
| 11090 |
+
"epoch": 13.16,
|
| 11091 |
+
"learning_rate": 6.128394061657813e-06,
|
| 11092 |
+
"loss": 2.7721,
|
| 11093 |
+
"step": 914000
|
| 11094 |
+
},
|
| 11095 |
+
{
|
| 11096 |
+
"epoch": 13.16,
|
| 11097 |
+
"learning_rate": 6.076467319905078e-06,
|
| 11098 |
+
"loss": 2.7701,
|
| 11099 |
+
"step": 914500
|
| 11100 |
+
},
|
| 11101 |
+
{
|
| 11102 |
+
"epoch": 13.17,
|
| 11103 |
+
"learning_rate": 6.024540578152342e-06,
|
| 11104 |
+
"loss": 2.766,
|
| 11105 |
+
"step": 915000
|
| 11106 |
+
},
|
| 11107 |
+
{
|
| 11108 |
+
"epoch": 13.18,
|
| 11109 |
+
"learning_rate": 5.972717689883113e-06,
|
| 11110 |
+
"loss": 2.7641,
|
| 11111 |
+
"step": 915500
|
| 11112 |
+
},
|
| 11113 |
+
{
|
| 11114 |
+
"epoch": 13.18,
|
| 11115 |
+
"learning_rate": 5.920790948130378e-06,
|
| 11116 |
+
"loss": 2.7665,
|
| 11117 |
+
"step": 916000
|
| 11118 |
+
},
|
| 11119 |
+
{
|
| 11120 |
+
"epoch": 13.19,
|
| 11121 |
+
"learning_rate": 5.868864206377643e-06,
|
| 11122 |
+
"loss": 2.768,
|
| 11123 |
+
"step": 916500
|
| 11124 |
+
},
|
| 11125 |
+
{
|
| 11126 |
+
"epoch": 13.2,
|
| 11127 |
+
"learning_rate": 5.8169374646249076e-06,
|
| 11128 |
+
"loss": 2.7703,
|
| 11129 |
+
"step": 917000
|
| 11130 |
+
},
|
| 11131 |
+
{
|
| 11132 |
+
"epoch": 13.21,
|
| 11133 |
+
"learning_rate": 5.7650107228721725e-06,
|
| 11134 |
+
"loss": 2.7685,
|
| 11135 |
+
"step": 917500
|
| 11136 |
+
},
|
| 11137 |
+
{
|
| 11138 |
+
"epoch": 13.21,
|
| 11139 |
+
"learning_rate": 5.7130839811194365e-06,
|
| 11140 |
+
"loss": 2.7681,
|
| 11141 |
+
"step": 918000
|
| 11142 |
+
},
|
| 11143 |
+
{
|
| 11144 |
+
"epoch": 13.22,
|
| 11145 |
+
"learning_rate": 5.661261092850207e-06,
|
| 11146 |
+
"loss": 2.7664,
|
| 11147 |
+
"step": 918500
|
| 11148 |
+
},
|
| 11149 |
+
{
|
| 11150 |
+
"epoch": 13.23,
|
| 11151 |
+
"learning_rate": 5.609334351097472e-06,
|
| 11152 |
+
"loss": 2.7648,
|
| 11153 |
+
"step": 919000
|
| 11154 |
+
},
|
| 11155 |
+
{
|
| 11156 |
+
"epoch": 13.24,
|
| 11157 |
+
"learning_rate": 5.557511462828242e-06,
|
| 11158 |
+
"loss": 2.7704,
|
| 11159 |
+
"step": 919500
|
| 11160 |
+
},
|
| 11161 |
+
{
|
| 11162 |
+
"epoch": 13.24,
|
| 11163 |
+
"learning_rate": 5.5055847210755064e-06,
|
| 11164 |
+
"loss": 2.7646,
|
| 11165 |
+
"step": 920000
|
| 11166 |
+
},
|
| 11167 |
+
{
|
| 11168 |
+
"epoch": 13.25,
|
| 11169 |
+
"learning_rate": 5.453657979322771e-06,
|
| 11170 |
+
"loss": 2.7614,
|
| 11171 |
+
"step": 920500
|
| 11172 |
+
},
|
| 11173 |
+
{
|
| 11174 |
+
"epoch": 13.26,
|
| 11175 |
+
"learning_rate": 5.401731237570037e-06,
|
| 11176 |
+
"loss": 2.7662,
|
| 11177 |
+
"step": 921000
|
| 11178 |
+
},
|
| 11179 |
+
{
|
| 11180 |
+
"epoch": 13.26,
|
| 11181 |
+
"learning_rate": 5.349804495817301e-06,
|
| 11182 |
+
"loss": 2.7648,
|
| 11183 |
+
"step": 921500
|
| 11184 |
+
},
|
| 11185 |
+
{
|
| 11186 |
+
"epoch": 13.27,
|
| 11187 |
+
"learning_rate": 5.297877754064566e-06,
|
| 11188 |
+
"loss": 2.7652,
|
| 11189 |
+
"step": 922000
|
| 11190 |
+
},
|
| 11191 |
+
{
|
| 11192 |
+
"epoch": 13.28,
|
| 11193 |
+
"learning_rate": 5.245951012311831e-06,
|
| 11194 |
+
"loss": 2.7639,
|
| 11195 |
+
"step": 922500
|
| 11196 |
+
},
|
| 11197 |
+
{
|
| 11198 |
+
"epoch": 13.29,
|
| 11199 |
+
"learning_rate": 5.194024270559095e-06,
|
| 11200 |
+
"loss": 2.7601,
|
| 11201 |
+
"step": 923000
|
| 11202 |
+
},
|
| 11203 |
+
{
|
| 11204 |
+
"epoch": 13.29,
|
| 11205 |
+
"learning_rate": 5.142097528806361e-06,
|
| 11206 |
+
"loss": 2.7665,
|
| 11207 |
+
"step": 923500
|
| 11208 |
+
},
|
| 11209 |
+
{
|
| 11210 |
+
"epoch": 13.3,
|
| 11211 |
+
"learning_rate": 5.090170787053625e-06,
|
| 11212 |
+
"loss": 2.7682,
|
| 11213 |
+
"step": 924000
|
| 11214 |
+
},
|
| 11215 |
+
{
|
| 11216 |
+
"epoch": 13.31,
|
| 11217 |
+
"learning_rate": 5.038347898784395e-06,
|
| 11218 |
+
"loss": 2.7652,
|
| 11219 |
+
"step": 924500
|
| 11220 |
+
},
|
| 11221 |
+
{
|
| 11222 |
+
"epoch": 13.31,
|
| 11223 |
+
"learning_rate": 4.98642115703166e-06,
|
| 11224 |
+
"loss": 2.7607,
|
| 11225 |
+
"step": 925000
|
| 11226 |
+
},
|
| 11227 |
+
{
|
| 11228 |
+
"epoch": 13.32,
|
| 11229 |
+
"learning_rate": 4.934494415278925e-06,
|
| 11230 |
+
"loss": 2.7694,
|
| 11231 |
+
"step": 925500
|
| 11232 |
+
},
|
| 11233 |
+
{
|
| 11234 |
+
"epoch": 13.33,
|
| 11235 |
+
"learning_rate": 4.882567673526189e-06,
|
| 11236 |
+
"loss": 2.7618,
|
| 11237 |
+
"step": 926000
|
| 11238 |
+
},
|
| 11239 |
+
{
|
| 11240 |
+
"epoch": 13.34,
|
| 11241 |
+
"learning_rate": 4.830640931773454e-06,
|
| 11242 |
+
"loss": 2.7679,
|
| 11243 |
+
"step": 926500
|
| 11244 |
+
},
|
| 11245 |
+
{
|
| 11246 |
+
"epoch": 13.34,
|
| 11247 |
+
"learning_rate": 4.778714190020719e-06,
|
| 11248 |
+
"loss": 2.7704,
|
| 11249 |
+
"step": 927000
|
| 11250 |
+
},
|
| 11251 |
+
{
|
| 11252 |
+
"epoch": 13.35,
|
| 11253 |
+
"learning_rate": 4.7268913017514894e-06,
|
| 11254 |
+
"loss": 2.7645,
|
| 11255 |
+
"step": 927500
|
| 11256 |
+
},
|
| 11257 |
+
{
|
| 11258 |
+
"epoch": 13.36,
|
| 11259 |
+
"learning_rate": 4.674964559998754e-06,
|
| 11260 |
+
"loss": 2.7672,
|
| 11261 |
+
"step": 928000
|
| 11262 |
+
},
|
| 11263 |
+
{
|
| 11264 |
+
"epoch": 13.36,
|
| 11265 |
+
"learning_rate": 4.623037818246019e-06,
|
| 11266 |
+
"loss": 2.7705,
|
| 11267 |
+
"step": 928500
|
| 11268 |
+
},
|
| 11269 |
+
{
|
| 11270 |
+
"epoch": 13.37,
|
| 11271 |
+
"learning_rate": 4.571111076493283e-06,
|
| 11272 |
+
"loss": 2.758,
|
| 11273 |
+
"step": 929000
|
| 11274 |
+
},
|
| 11275 |
+
{
|
| 11276 |
+
"epoch": 13.38,
|
| 11277 |
+
"learning_rate": 4.519184334740548e-06,
|
| 11278 |
+
"loss": 2.7695,
|
| 11279 |
+
"step": 929500
|
| 11280 |
+
},
|
| 11281 |
+
{
|
| 11282 |
+
"epoch": 13.39,
|
| 11283 |
+
"learning_rate": 4.467361446471319e-06,
|
| 11284 |
+
"loss": 2.768,
|
| 11285 |
+
"step": 930000
|
| 11286 |
+
},
|
| 11287 |
+
{
|
| 11288 |
+
"epoch": 13.39,
|
| 11289 |
+
"learning_rate": 4.415434704718583e-06,
|
| 11290 |
+
"loss": 2.7683,
|
| 11291 |
+
"step": 930500
|
| 11292 |
+
},
|
| 11293 |
+
{
|
| 11294 |
+
"epoch": 13.4,
|
| 11295 |
+
"learning_rate": 4.363507962965848e-06,
|
| 11296 |
+
"loss": 2.7695,
|
| 11297 |
+
"step": 931000
|
| 11298 |
+
},
|
| 11299 |
+
{
|
| 11300 |
+
"epoch": 13.41,
|
| 11301 |
+
"learning_rate": 4.311581221213113e-06,
|
| 11302 |
+
"loss": 2.7767,
|
| 11303 |
+
"step": 931500
|
| 11304 |
+
},
|
| 11305 |
+
{
|
| 11306 |
+
"epoch": 13.42,
|
| 11307 |
+
"learning_rate": 4.2596544794603774e-06,
|
| 11308 |
+
"loss": 2.7615,
|
| 11309 |
+
"step": 932000
|
| 11310 |
+
},
|
| 11311 |
+
{
|
| 11312 |
+
"epoch": 13.42,
|
| 11313 |
+
"learning_rate": 4.207727737707642e-06,
|
| 11314 |
+
"loss": 2.761,
|
| 11315 |
+
"step": 932500
|
| 11316 |
+
},
|
| 11317 |
+
{
|
| 11318 |
+
"epoch": 13.43,
|
| 11319 |
+
"learning_rate": 4.155800995954906e-06,
|
| 11320 |
+
"loss": 2.767,
|
| 11321 |
+
"step": 933000
|
| 11322 |
+
},
|
| 11323 |
+
{
|
| 11324 |
+
"epoch": 13.44,
|
| 11325 |
+
"learning_rate": 4.103978107685677e-06,
|
| 11326 |
+
"loss": 2.7671,
|
| 11327 |
+
"step": 933500
|
| 11328 |
+
},
|
| 11329 |
+
{
|
| 11330 |
+
"epoch": 13.44,
|
| 11331 |
+
"learning_rate": 4.052051365932942e-06,
|
| 11332 |
+
"loss": 2.7686,
|
| 11333 |
+
"step": 934000
|
| 11334 |
+
},
|
| 11335 |
+
{
|
| 11336 |
+
"epoch": 13.45,
|
| 11337 |
+
"learning_rate": 4.000124624180207e-06,
|
| 11338 |
+
"loss": 2.7656,
|
| 11339 |
+
"step": 934500
|
| 11340 |
+
},
|
| 11341 |
+
{
|
| 11342 |
+
"epoch": 13.46,
|
| 11343 |
+
"learning_rate": 3.948197882427472e-06,
|
| 11344 |
+
"loss": 2.7653,
|
| 11345 |
+
"step": 935000
|
| 11346 |
+
},
|
| 11347 |
+
{
|
| 11348 |
+
"epoch": 13.47,
|
| 11349 |
+
"learning_rate": 3.8962711406747365e-06,
|
| 11350 |
+
"loss": 2.7629,
|
| 11351 |
+
"step": 935500
|
| 11352 |
+
},
|
| 11353 |
+
{
|
| 11354 |
+
"epoch": 13.47,
|
| 11355 |
+
"learning_rate": 3.844448252405507e-06,
|
| 11356 |
+
"loss": 2.7645,
|
| 11357 |
+
"step": 936000
|
| 11358 |
+
},
|
| 11359 |
+
{
|
| 11360 |
+
"epoch": 13.48,
|
| 11361 |
+
"learning_rate": 3.792521510652771e-06,
|
| 11362 |
+
"loss": 2.7672,
|
| 11363 |
+
"step": 936500
|
| 11364 |
+
},
|
| 11365 |
+
{
|
| 11366 |
+
"epoch": 13.49,
|
| 11367 |
+
"learning_rate": 3.740594768900036e-06,
|
| 11368 |
+
"loss": 2.7698,
|
| 11369 |
+
"step": 937000
|
| 11370 |
+
},
|
| 11371 |
+
{
|
| 11372 |
+
"epoch": 13.49,
|
| 11373 |
+
"learning_rate": 3.6886680271473013e-06,
|
| 11374 |
+
"loss": 2.7685,
|
| 11375 |
+
"step": 937500
|
| 11376 |
+
},
|
| 11377 |
+
{
|
| 11378 |
+
"epoch": 13.5,
|
| 11379 |
+
"learning_rate": 3.6367412853945653e-06,
|
| 11380 |
+
"loss": 2.7602,
|
| 11381 |
+
"step": 938000
|
| 11382 |
+
},
|
| 11383 |
+
{
|
| 11384 |
+
"epoch": 13.51,
|
| 11385 |
+
"learning_rate": 3.5849183971253358e-06,
|
| 11386 |
+
"loss": 2.7635,
|
| 11387 |
+
"step": 938500
|
| 11388 |
+
},
|
| 11389 |
+
{
|
| 11390 |
+
"epoch": 13.52,
|
| 11391 |
+
"learning_rate": 3.5329916553726007e-06,
|
| 11392 |
+
"loss": 2.7602,
|
| 11393 |
+
"step": 939000
|
| 11394 |
+
},
|
| 11395 |
+
{
|
| 11396 |
+
"epoch": 13.52,
|
| 11397 |
+
"learning_rate": 3.481064913619865e-06,
|
| 11398 |
+
"loss": 2.7568,
|
| 11399 |
+
"step": 939500
|
| 11400 |
+
},
|
| 11401 |
+
{
|
| 11402 |
+
"epoch": 13.53,
|
| 11403 |
+
"learning_rate": 3.42913817186713e-06,
|
| 11404 |
+
"loss": 2.7642,
|
| 11405 |
+
"step": 940000
|
| 11406 |
+
},
|
| 11407 |
+
{
|
| 11408 |
+
"epoch": 13.54,
|
| 11409 |
+
"learning_rate": 3.377211430114395e-06,
|
| 11410 |
+
"loss": 2.7657,
|
| 11411 |
+
"step": 940500
|
| 11412 |
+
},
|
| 11413 |
+
{
|
| 11414 |
+
"epoch": 13.54,
|
| 11415 |
+
"learning_rate": 3.3252846883616595e-06,
|
| 11416 |
+
"loss": 2.7644,
|
| 11417 |
+
"step": 941000
|
| 11418 |
+
},
|
| 11419 |
+
{
|
| 11420 |
+
"epoch": 13.55,
|
| 11421 |
+
"learning_rate": 3.2734618000924295e-06,
|
| 11422 |
+
"loss": 2.765,
|
| 11423 |
+
"step": 941500
|
| 11424 |
+
},
|
| 11425 |
+
{
|
| 11426 |
+
"epoch": 13.56,
|
| 11427 |
+
"learning_rate": 3.221535058339695e-06,
|
| 11428 |
+
"loss": 2.7656,
|
| 11429 |
+
"step": 942000
|
| 11430 |
+
},
|
| 11431 |
+
{
|
| 11432 |
+
"epoch": 13.57,
|
| 11433 |
+
"learning_rate": 3.169608316586959e-06,
|
| 11434 |
+
"loss": 2.7593,
|
| 11435 |
+
"step": 942500
|
| 11436 |
+
},
|
| 11437 |
+
{
|
| 11438 |
+
"epoch": 13.57,
|
| 11439 |
+
"learning_rate": 3.1176815748342242e-06,
|
| 11440 |
+
"loss": 2.7662,
|
| 11441 |
+
"step": 943000
|
| 11442 |
+
},
|
| 11443 |
+
{
|
| 11444 |
+
"epoch": 13.58,
|
| 11445 |
+
"learning_rate": 3.0657548330814887e-06,
|
| 11446 |
+
"loss": 2.7595,
|
| 11447 |
+
"step": 943500
|
| 11448 |
+
},
|
| 11449 |
+
{
|
| 11450 |
+
"epoch": 13.59,
|
| 11451 |
+
"learning_rate": 3.013931944812259e-06,
|
| 11452 |
+
"loss": 2.7605,
|
| 11453 |
+
"step": 944000
|
| 11454 |
+
},
|
| 11455 |
+
{
|
| 11456 |
+
"epoch": 13.6,
|
| 11457 |
+
"learning_rate": 2.9620052030595237e-06,
|
| 11458 |
+
"loss": 2.7638,
|
| 11459 |
+
"step": 944500
|
| 11460 |
+
},
|
| 11461 |
+
{
|
| 11462 |
+
"epoch": 13.6,
|
| 11463 |
+
"learning_rate": 2.9100784613067886e-06,
|
| 11464 |
+
"loss": 2.7634,
|
| 11465 |
+
"step": 945000
|
| 11466 |
+
},
|
| 11467 |
+
{
|
| 11468 |
+
"epoch": 13.61,
|
| 11469 |
+
"learning_rate": 2.8581517195540535e-06,
|
| 11470 |
+
"loss": 2.7629,
|
| 11471 |
+
"step": 945500
|
| 11472 |
+
},
|
| 11473 |
+
{
|
| 11474 |
+
"epoch": 13.62,
|
| 11475 |
+
"learning_rate": 2.806224977801318e-06,
|
| 11476 |
+
"loss": 2.7662,
|
| 11477 |
+
"step": 946000
|
| 11478 |
+
},
|
| 11479 |
+
{
|
| 11480 |
+
"epoch": 13.62,
|
| 11481 |
+
"learning_rate": 2.754298236048583e-06,
|
| 11482 |
+
"loss": 2.7636,
|
| 11483 |
+
"step": 946500
|
| 11484 |
+
},
|
| 11485 |
+
{
|
| 11486 |
+
"epoch": 13.63,
|
| 11487 |
+
"learning_rate": 2.7023714942958473e-06,
|
| 11488 |
+
"loss": 2.7625,
|
| 11489 |
+
"step": 947000
|
| 11490 |
+
},
|
| 11491 |
+
{
|
| 11492 |
+
"epoch": 13.64,
|
| 11493 |
+
"learning_rate": 2.650548606026618e-06,
|
| 11494 |
+
"loss": 2.7606,
|
| 11495 |
+
"step": 947500
|
| 11496 |
+
},
|
| 11497 |
+
{
|
| 11498 |
+
"epoch": 13.65,
|
| 11499 |
+
"learning_rate": 2.5986218642738823e-06,
|
| 11500 |
+
"loss": 2.7685,
|
| 11501 |
+
"step": 948000
|
| 11502 |
+
},
|
| 11503 |
+
{
|
| 11504 |
+
"epoch": 13.65,
|
| 11505 |
+
"learning_rate": 2.546695122521147e-06,
|
| 11506 |
+
"loss": 2.7667,
|
| 11507 |
+
"step": 948500
|
| 11508 |
+
},
|
| 11509 |
+
{
|
| 11510 |
+
"epoch": 13.66,
|
| 11511 |
+
"learning_rate": 2.494768380768412e-06,
|
| 11512 |
+
"loss": 2.7613,
|
| 11513 |
+
"step": 949000
|
| 11514 |
+
},
|
| 11515 |
+
{
|
| 11516 |
+
"epoch": 13.67,
|
| 11517 |
+
"learning_rate": 2.4428416390156766e-06,
|
| 11518 |
+
"loss": 2.7675,
|
| 11519 |
+
"step": 949500
|
| 11520 |
+
},
|
| 11521 |
+
{
|
| 11522 |
+
"epoch": 13.67,
|
| 11523 |
+
"learning_rate": 2.391018750746447e-06,
|
| 11524 |
+
"loss": 2.766,
|
| 11525 |
+
"step": 950000
|
| 11526 |
+
},
|
| 11527 |
+
{
|
| 11528 |
+
"epoch": 13.68,
|
| 11529 |
+
"learning_rate": 2.3390920089937115e-06,
|
| 11530 |
+
"loss": 2.7623,
|
| 11531 |
+
"step": 950500
|
| 11532 |
+
},
|
| 11533 |
+
{
|
| 11534 |
+
"epoch": 13.69,
|
| 11535 |
+
"learning_rate": 2.287165267240977e-06,
|
| 11536 |
+
"loss": 2.7623,
|
| 11537 |
+
"step": 951000
|
| 11538 |
+
},
|
| 11539 |
+
{
|
| 11540 |
+
"epoch": 13.7,
|
| 11541 |
+
"learning_rate": 2.2352385254882413e-06,
|
| 11542 |
+
"loss": 2.7631,
|
| 11543 |
+
"step": 951500
|
| 11544 |
+
},
|
| 11545 |
+
{
|
| 11546 |
+
"epoch": 13.7,
|
| 11547 |
+
"learning_rate": 2.1833117837355062e-06,
|
| 11548 |
+
"loss": 2.7559,
|
| 11549 |
+
"step": 952000
|
| 11550 |
+
},
|
| 11551 |
+
{
|
| 11552 |
+
"epoch": 13.71,
|
| 11553 |
+
"learning_rate": 2.1313850419827707e-06,
|
| 11554 |
+
"loss": 2.7593,
|
| 11555 |
+
"step": 952500
|
| 11556 |
+
},
|
| 11557 |
+
{
|
| 11558 |
+
"epoch": 13.72,
|
| 11559 |
+
"learning_rate": 2.0794583002300356e-06,
|
| 11560 |
+
"loss": 2.7603,
|
| 11561 |
+
"step": 953000
|
| 11562 |
+
},
|
| 11563 |
+
{
|
| 11564 |
+
"epoch": 13.72,
|
| 11565 |
+
"learning_rate": 2.0276354119608057e-06,
|
| 11566 |
+
"loss": 2.7611,
|
| 11567 |
+
"step": 953500
|
| 11568 |
+
},
|
| 11569 |
+
{
|
| 11570 |
+
"epoch": 13.73,
|
| 11571 |
+
"learning_rate": 1.9757086702080706e-06,
|
| 11572 |
+
"loss": 2.7659,
|
| 11573 |
+
"step": 954000
|
| 11574 |
+
},
|
| 11575 |
+
{
|
| 11576 |
+
"epoch": 13.74,
|
| 11577 |
+
"learning_rate": 1.9237819284553355e-06,
|
| 11578 |
+
"loss": 2.7671,
|
| 11579 |
+
"step": 954500
|
| 11580 |
+
},
|
| 11581 |
+
{
|
| 11582 |
+
"epoch": 13.75,
|
| 11583 |
+
"learning_rate": 1.8718551867026e-06,
|
| 11584 |
+
"loss": 2.7695,
|
| 11585 |
+
"step": 955000
|
| 11586 |
+
},
|
| 11587 |
+
{
|
| 11588 |
+
"epoch": 13.75,
|
| 11589 |
+
"learning_rate": 1.8199284449498646e-06,
|
| 11590 |
+
"loss": 2.7619,
|
| 11591 |
+
"step": 955500
|
| 11592 |
+
},
|
| 11593 |
+
{
|
| 11594 |
+
"epoch": 13.76,
|
| 11595 |
+
"learning_rate": 1.7681055566806351e-06,
|
| 11596 |
+
"loss": 2.765,
|
| 11597 |
+
"step": 956000
|
| 11598 |
+
},
|
| 11599 |
+
{
|
| 11600 |
+
"epoch": 13.77,
|
| 11601 |
+
"learning_rate": 1.7161788149278996e-06,
|
| 11602 |
+
"loss": 2.7589,
|
| 11603 |
+
"step": 956500
|
| 11604 |
+
},
|
| 11605 |
+
{
|
| 11606 |
+
"epoch": 13.78,
|
| 11607 |
+
"learning_rate": 1.6642520731751647e-06,
|
| 11608 |
+
"loss": 2.7621,
|
| 11609 |
+
"step": 957000
|
| 11610 |
+
},
|
| 11611 |
+
{
|
| 11612 |
+
"epoch": 13.78,
|
| 11613 |
+
"learning_rate": 1.6123253314224294e-06,
|
| 11614 |
+
"loss": 2.7618,
|
| 11615 |
+
"step": 957500
|
| 11616 |
+
},
|
| 11617 |
+
{
|
| 11618 |
+
"epoch": 13.79,
|
| 11619 |
+
"learning_rate": 1.560398589669694e-06,
|
| 11620 |
+
"loss": 2.7611,
|
| 11621 |
+
"step": 958000
|
| 11622 |
+
},
|
| 11623 |
+
{
|
| 11624 |
+
"epoch": 13.8,
|
| 11625 |
+
"learning_rate": 1.5084718479169588e-06,
|
| 11626 |
+
"loss": 2.7601,
|
| 11627 |
+
"step": 958500
|
| 11628 |
+
},
|
| 11629 |
+
{
|
| 11630 |
+
"epoch": 13.8,
|
| 11631 |
+
"learning_rate": 1.456648959647729e-06,
|
| 11632 |
+
"loss": 2.7613,
|
| 11633 |
+
"step": 959000
|
| 11634 |
+
},
|
| 11635 |
+
{
|
| 11636 |
+
"epoch": 13.81,
|
| 11637 |
+
"learning_rate": 1.404722217894994e-06,
|
| 11638 |
+
"loss": 2.76,
|
| 11639 |
+
"step": 959500
|
| 11640 |
+
},
|
| 11641 |
+
{
|
| 11642 |
+
"epoch": 13.82,
|
| 11643 |
+
"learning_rate": 1.3527954761422584e-06,
|
| 11644 |
+
"loss": 2.7567,
|
| 11645 |
+
"step": 960000
|
| 11646 |
+
},
|
| 11647 |
+
{
|
| 11648 |
+
"epoch": 13.83,
|
| 11649 |
+
"learning_rate": 1.3008687343895233e-06,
|
| 11650 |
+
"loss": 2.7568,
|
| 11651 |
+
"step": 960500
|
| 11652 |
+
},
|
| 11653 |
+
{
|
| 11654 |
+
"epoch": 13.83,
|
| 11655 |
+
"learning_rate": 1.248941992636788e-06,
|
| 11656 |
+
"loss": 2.7613,
|
| 11657 |
+
"step": 961000
|
| 11658 |
+
},
|
| 11659 |
+
{
|
| 11660 |
+
"epoch": 13.84,
|
| 11661 |
+
"learning_rate": 1.197015250884053e-06,
|
| 11662 |
+
"loss": 2.7679,
|
| 11663 |
+
"step": 961500
|
| 11664 |
+
},
|
| 11665 |
+
{
|
| 11666 |
+
"epoch": 13.85,
|
| 11667 |
+
"learning_rate": 1.1450885091313176e-06,
|
| 11668 |
+
"loss": 2.7643,
|
| 11669 |
+
"step": 962000
|
| 11670 |
+
},
|
| 11671 |
+
{
|
| 11672 |
+
"epoch": 13.85,
|
| 11673 |
+
"learning_rate": 1.0931617673785823e-06,
|
| 11674 |
+
"loss": 2.7615,
|
| 11675 |
+
"step": 962500
|
| 11676 |
+
},
|
| 11677 |
+
{
|
| 11678 |
+
"epoch": 13.86,
|
| 11679 |
+
"learning_rate": 1.0413388791093526e-06,
|
| 11680 |
+
"loss": 2.7609,
|
| 11681 |
+
"step": 963000
|
| 11682 |
+
},
|
| 11683 |
+
{
|
| 11684 |
+
"epoch": 13.87,
|
| 11685 |
+
"learning_rate": 9.894121373566175e-07,
|
| 11686 |
+
"loss": 2.7618,
|
| 11687 |
+
"step": 963500
|
| 11688 |
+
},
|
| 11689 |
+
{
|
| 11690 |
+
"epoch": 13.88,
|
| 11691 |
+
"learning_rate": 9.375892490873875e-07,
|
| 11692 |
+
"loss": 2.7576,
|
| 11693 |
+
"step": 964000
|
| 11694 |
+
},
|
| 11695 |
+
{
|
| 11696 |
+
"epoch": 13.88,
|
| 11697 |
+
"learning_rate": 8.856625073346523e-07,
|
| 11698 |
+
"loss": 2.7619,
|
| 11699 |
+
"step": 964500
|
| 11700 |
+
},
|
| 11701 |
+
{
|
| 11702 |
+
"epoch": 13.89,
|
| 11703 |
+
"learning_rate": 8.33735765581917e-07,
|
| 11704 |
+
"loss": 2.7564,
|
| 11705 |
+
"step": 965000
|
| 11706 |
+
},
|
| 11707 |
+
{
|
| 11708 |
+
"epoch": 13.9,
|
| 11709 |
+
"learning_rate": 7.818090238291818e-07,
|
| 11710 |
+
"loss": 2.7631,
|
| 11711 |
+
"step": 965500
|
| 11712 |
+
},
|
| 11713 |
+
{
|
| 11714 |
+
"epoch": 13.9,
|
| 11715 |
+
"learning_rate": 7.298822820764466e-07,
|
| 11716 |
+
"loss": 2.7709,
|
| 11717 |
+
"step": 966000
|
| 11718 |
+
},
|
| 11719 |
+
{
|
| 11720 |
+
"epoch": 13.91,
|
| 11721 |
+
"learning_rate": 6.779555403237114e-07,
|
| 11722 |
+
"loss": 2.7624,
|
| 11723 |
+
"step": 966500
|
| 11724 |
+
},
|
| 11725 |
+
{
|
| 11726 |
+
"epoch": 13.92,
|
| 11727 |
+
"learning_rate": 6.261326520544816e-07,
|
| 11728 |
+
"loss": 2.7607,
|
| 11729 |
+
"step": 967000
|
| 11730 |
+
},
|
| 11731 |
+
{
|
| 11732 |
+
"epoch": 13.93,
|
| 11733 |
+
"learning_rate": 5.742059103017464e-07,
|
| 11734 |
+
"loss": 2.7559,
|
| 11735 |
+
"step": 967500
|
| 11736 |
+
},
|
| 11737 |
+
{
|
| 11738 |
+
"epoch": 13.93,
|
| 11739 |
+
"learning_rate": 5.222791685490112e-07,
|
| 11740 |
+
"loss": 2.7601,
|
| 11741 |
+
"step": 968000
|
| 11742 |
+
},
|
| 11743 |
+
{
|
| 11744 |
+
"epoch": 13.94,
|
| 11745 |
+
"learning_rate": 4.7035242679627586e-07,
|
| 11746 |
+
"loss": 2.7625,
|
| 11747 |
+
"step": 968500
|
| 11748 |
+
},
|
| 11749 |
+
{
|
| 11750 |
+
"epoch": 13.95,
|
| 11751 |
+
"learning_rate": 4.184256850435406e-07,
|
| 11752 |
+
"loss": 2.7585,
|
| 11753 |
+
"step": 969000
|
| 11754 |
+
},
|
| 11755 |
+
{
|
| 11756 |
+
"epoch": 13.96,
|
| 11757 |
+
"learning_rate": 3.666027967743108e-07,
|
| 11758 |
+
"loss": 2.7573,
|
| 11759 |
+
"step": 969500
|
| 11760 |
+
},
|
| 11761 |
+
{
|
| 11762 |
+
"epoch": 13.96,
|
| 11763 |
+
"learning_rate": 3.1467605502157556e-07,
|
| 11764 |
+
"loss": 2.7586,
|
| 11765 |
+
"step": 970000
|
| 11766 |
+
},
|
| 11767 |
+
{
|
| 11768 |
+
"epoch": 13.97,
|
| 11769 |
+
"learning_rate": 2.627493132688403e-07,
|
| 11770 |
+
"loss": 2.7611,
|
| 11771 |
+
"step": 970500
|
| 11772 |
+
},
|
| 11773 |
+
{
|
| 11774 |
+
"epoch": 13.98,
|
| 11775 |
+
"learning_rate": 2.1082257151610508e-07,
|
| 11776 |
+
"loss": 2.7612,
|
| 11777 |
+
"step": 971000
|
| 11778 |
+
},
|
| 11779 |
+
{
|
| 11780 |
+
"epoch": 13.98,
|
| 11781 |
+
"learning_rate": 1.5899968324687532e-07,
|
| 11782 |
+
"loss": 2.7539,
|
| 11783 |
+
"step": 971500
|
| 11784 |
+
},
|
| 11785 |
+
{
|
| 11786 |
+
"epoch": 13.99,
|
| 11787 |
+
"learning_rate": 1.0707294149414008e-07,
|
| 11788 |
+
"loss": 2.7624,
|
| 11789 |
+
"step": 972000
|
| 11790 |
+
},
|
| 11791 |
+
{
|
| 11792 |
+
"epoch": 14.0,
|
| 11793 |
+
"learning_rate": 5.5146199741404825e-08,
|
| 11794 |
+
"loss": 2.761,
|
| 11795 |
+
"step": 972500
|
| 11796 |
+
},
|
| 11797 |
+
{
|
| 11798 |
+
"epoch": 14.0,
|
| 11799 |
+
"eval_accuracy": 0.509524975254837,
|
| 11800 |
+
"eval_loss": 2.5919222831726074,
|
| 11801 |
+
"eval_runtime": 555.1881,
|
| 11802 |
+
"eval_samples_per_second": 970.729,
|
| 11803 |
+
"eval_steps_per_second": 40.448,
|
| 11804 |
+
"step": 972622
|
| 11805 |
}
|
| 11806 |
],
|
| 11807 |
"max_steps": 972622,
|
| 11808 |
"num_train_epochs": 14,
|
| 11809 |
+
"total_flos": 5.9815806927215e+18,
|
| 11810 |
"trial_name": null,
|
| 11811 |
"trial_params": null
|
| 11812 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae1fe310f1f9e883c23d6725718feca058676978792d4f4de4c8dbbe2df9c17e
|
| 3 |
size 118242180
|
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3beebb212def5ed2a93cacef0a8f337665578f45993672c54eb6b8189a7b4fc
|
| 3 |
+
size 319132
|