Training in progress, step 6500, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b70b1bdd1598615c49c4b9a7faeeaa85e0df1ab80935c4de9703e337cbef5419
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc84cdd38d4fb81c57c92e318089a0050c59636f80d52d59c7e95dfd9fd62580
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4712250438ce35119c47f3071be3ca85a4fce51b421eda9263e5ccdc56ad810
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03a7d0375d4dc32e11bcee4d7faf50e1efa9d4c215c6763c2e4a46a246814940
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f97937888bf353d4425445e26e6749a80bf045549b1996cc08838b4dfb4b8dc4
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11d0834123efa7f652e2e631a76ccc6e13c613f625cb331ed1e2b81641ebca01
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:352b2435cc0fdbc839b93fecae50d3830aa0717204cfab826aa5127ee89d2407
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58cce52d813acc463fc99594977081fcbdb55dfd090284d6dbc8cb7c0ca23dd0
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89592922c71a0801bf0d6fdc601852fa0221a03b1ab5fb935185066acc67448f
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc83c14767c41315dc8ec259110c74b59ff4daddfa2add8ceb7d6ecfcf304840
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9011b070f191d0e0ac40350bb8e4c21dd15e660927e0930b9cd365a37b434167
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd7e984441e517b75f1d23d418db3472b205bd6171ca12f9c999f36bc527e641
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c58e283a30a20e60ecca74baeb4de711e3041934465bd25e1ee1e0167c92157
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -48659,6 +48659,766 @@
|
|
| 48659 |
"eval_samples_per_second": 5.929,
|
| 48660 |
"eval_steps_per_second": 0.204,
|
| 48661 |
"step": 6400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48662 |
}
|
| 48663 |
],
|
| 48664 |
"logging_steps": 1,
|
|
@@ -48678,7 +49438,7 @@
|
|
| 48678 |
"attributes": {}
|
| 48679 |
}
|
| 48680 |
},
|
| 48681 |
-
"total_flos":
|
| 48682 |
"train_batch_size": 8,
|
| 48683 |
"trial_name": null,
|
| 48684 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9590556990040575,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 48659 |
"eval_samples_per_second": 5.929,
|
| 48660 |
"eval_steps_per_second": 0.204,
|
| 48661 |
"step": 6400
|
| 48662 |
+
},
|
| 48663 |
+
{
|
| 48664 |
+
"epoch": 0.9444485429730727,
|
| 48665 |
+
"grad_norm": 5.365920543670654,
|
| 48666 |
+
"learning_rate": 1.8696924803641825e-07,
|
| 48667 |
+
"loss": 0.0539,
|
| 48668 |
+
"step": 6401
|
| 48669 |
+
},
|
| 48670 |
+
{
|
| 48671 |
+
"epoch": 0.9445960900036887,
|
| 48672 |
+
"grad_norm": 1.1807736158370972,
|
| 48673 |
+
"learning_rate": 1.8597914335757085e-07,
|
| 48674 |
+
"loss": 0.0485,
|
| 48675 |
+
"step": 6402
|
| 48676 |
+
},
|
| 48677 |
+
{
|
| 48678 |
+
"epoch": 0.9447436370343046,
|
| 48679 |
+
"grad_norm": 1.9345782995224,
|
| 48680 |
+
"learning_rate": 1.8499164260956548e-07,
|
| 48681 |
+
"loss": 0.025,
|
| 48682 |
+
"step": 6403
|
| 48683 |
+
},
|
| 48684 |
+
{
|
| 48685 |
+
"epoch": 0.9448911840649207,
|
| 48686 |
+
"grad_norm": 2.943807363510132,
|
| 48687 |
+
"learning_rate": 1.840067460544137e-07,
|
| 48688 |
+
"loss": 0.0545,
|
| 48689 |
+
"step": 6404
|
| 48690 |
+
},
|
| 48691 |
+
{
|
| 48692 |
+
"epoch": 0.9450387310955367,
|
| 48693 |
+
"grad_norm": 1.9922869205474854,
|
| 48694 |
+
"learning_rate": 1.830244539534376e-07,
|
| 48695 |
+
"loss": 0.0358,
|
| 48696 |
+
"step": 6405
|
| 48697 |
+
},
|
| 48698 |
+
{
|
| 48699 |
+
"epoch": 0.9451862781261527,
|
| 48700 |
+
"grad_norm": 2.3645715713500977,
|
| 48701 |
+
"learning_rate": 1.820447665672631e-07,
|
| 48702 |
+
"loss": 0.0791,
|
| 48703 |
+
"step": 6406
|
| 48704 |
+
},
|
| 48705 |
+
{
|
| 48706 |
+
"epoch": 0.9453338251567687,
|
| 48707 |
+
"grad_norm": 2.1656434535980225,
|
| 48708 |
+
"learning_rate": 1.810676841558301e-07,
|
| 48709 |
+
"loss": 0.0472,
|
| 48710 |
+
"step": 6407
|
| 48711 |
+
},
|
| 48712 |
+
{
|
| 48713 |
+
"epoch": 0.9454813721873847,
|
| 48714 |
+
"grad_norm": 1.5175650119781494,
|
| 48715 |
+
"learning_rate": 1.8009320697838672e-07,
|
| 48716 |
+
"loss": 0.024,
|
| 48717 |
+
"step": 6408
|
| 48718 |
+
},
|
| 48719 |
+
{
|
| 48720 |
+
"epoch": 0.9456289192180007,
|
| 48721 |
+
"grad_norm": 1.9532819986343384,
|
| 48722 |
+
"learning_rate": 1.791213352934851e-07,
|
| 48723 |
+
"loss": 0.045,
|
| 48724 |
+
"step": 6409
|
| 48725 |
+
},
|
| 48726 |
+
{
|
| 48727 |
+
"epoch": 0.9457764662486168,
|
| 48728 |
+
"grad_norm": 1.9861067533493042,
|
| 48729 |
+
"learning_rate": 1.7815206935899332e-07,
|
| 48730 |
+
"loss": 0.0689,
|
| 48731 |
+
"step": 6410
|
| 48732 |
+
},
|
| 48733 |
+
{
|
| 48734 |
+
"epoch": 0.9459240132792327,
|
| 48735 |
+
"grad_norm": 5.2447967529296875,
|
| 48736 |
+
"learning_rate": 1.771854094320835e-07,
|
| 48737 |
+
"loss": 0.0425,
|
| 48738 |
+
"step": 6411
|
| 48739 |
+
},
|
| 48740 |
+
{
|
| 48741 |
+
"epoch": 0.9460715603098487,
|
| 48742 |
+
"grad_norm": 2.146310567855835,
|
| 48743 |
+
"learning_rate": 1.7622135576923495e-07,
|
| 48744 |
+
"loss": 0.0601,
|
| 48745 |
+
"step": 6412
|
| 48746 |
+
},
|
| 48747 |
+
{
|
| 48748 |
+
"epoch": 0.9462191073404648,
|
| 48749 |
+
"grad_norm": 2.063035726547241,
|
| 48750 |
+
"learning_rate": 1.7525990862624188e-07,
|
| 48751 |
+
"loss": 0.0776,
|
| 48752 |
+
"step": 6413
|
| 48753 |
+
},
|
| 48754 |
+
{
|
| 48755 |
+
"epoch": 0.9463666543710808,
|
| 48756 |
+
"grad_norm": 2.2232484817504883,
|
| 48757 |
+
"learning_rate": 1.7430106825819804e-07,
|
| 48758 |
+
"loss": 0.0306,
|
| 48759 |
+
"step": 6414
|
| 48760 |
+
},
|
| 48761 |
+
{
|
| 48762 |
+
"epoch": 0.9465142014016968,
|
| 48763 |
+
"grad_norm": 1.885646939277649,
|
| 48764 |
+
"learning_rate": 1.7334483491951327e-07,
|
| 48765 |
+
"loss": 0.0365,
|
| 48766 |
+
"step": 6415
|
| 48767 |
+
},
|
| 48768 |
+
{
|
| 48769 |
+
"epoch": 0.9466617484323128,
|
| 48770 |
+
"grad_norm": 2.823607921600342,
|
| 48771 |
+
"learning_rate": 1.7239120886390347e-07,
|
| 48772 |
+
"loss": 0.0833,
|
| 48773 |
+
"step": 6416
|
| 48774 |
+
},
|
| 48775 |
+
{
|
| 48776 |
+
"epoch": 0.9468092954629288,
|
| 48777 |
+
"grad_norm": 3.014352798461914,
|
| 48778 |
+
"learning_rate": 1.7144019034438851e-07,
|
| 48779 |
+
"loss": 0.0913,
|
| 48780 |
+
"step": 6417
|
| 48781 |
+
},
|
| 48782 |
+
{
|
| 48783 |
+
"epoch": 0.9469568424935448,
|
| 48784 |
+
"grad_norm": 2.5719082355499268,
|
| 48785 |
+
"learning_rate": 1.7049177961330432e-07,
|
| 48786 |
+
"loss": 0.0422,
|
| 48787 |
+
"step": 6418
|
| 48788 |
+
},
|
| 48789 |
+
{
|
| 48790 |
+
"epoch": 0.9471043895241609,
|
| 48791 |
+
"grad_norm": 2.7025134563446045,
|
| 48792 |
+
"learning_rate": 1.6954597692228626e-07,
|
| 48793 |
+
"loss": 0.1112,
|
| 48794 |
+
"step": 6419
|
| 48795 |
+
},
|
| 48796 |
+
{
|
| 48797 |
+
"epoch": 0.9472519365547768,
|
| 48798 |
+
"grad_norm": 1.737754464149475,
|
| 48799 |
+
"learning_rate": 1.6860278252228358e-07,
|
| 48800 |
+
"loss": 0.0323,
|
| 48801 |
+
"step": 6420
|
| 48802 |
+
},
|
| 48803 |
+
{
|
| 48804 |
+
"epoch": 0.9472519365547768,
|
| 48805 |
+
"eval_accuracy": 0.9782923299565847,
|
| 48806 |
+
"eval_f1": 0.9629629629629629,
|
| 48807 |
+
"eval_loss": 0.05500521510839462,
|
| 48808 |
+
"eval_precision": 0.9798994974874372,
|
| 48809 |
+
"eval_recall": 0.9466019417475728,
|
| 48810 |
+
"eval_runtime": 48.669,
|
| 48811 |
+
"eval_samples_per_second": 5.979,
|
| 48812 |
+
"eval_steps_per_second": 0.205,
|
| 48813 |
+
"step": 6420
|
| 48814 |
+
},
|
| 48815 |
+
{
|
| 48816 |
+
"epoch": 0.9473994835853928,
|
| 48817 |
+
"grad_norm": 2.6450870037078857,
|
| 48818 |
+
"learning_rate": 1.6766219666355278e-07,
|
| 48819 |
+
"loss": 0.0535,
|
| 48820 |
+
"step": 6421
|
| 48821 |
+
},
|
| 48822 |
+
{
|
| 48823 |
+
"epoch": 0.9475470306160089,
|
| 48824 |
+
"grad_norm": 2.7876486778259277,
|
| 48825 |
+
"learning_rate": 1.6672421959565755e-07,
|
| 48826 |
+
"loss": 0.0498,
|
| 48827 |
+
"step": 6422
|
| 48828 |
+
},
|
| 48829 |
+
{
|
| 48830 |
+
"epoch": 0.9476945776466249,
|
| 48831 |
+
"grad_norm": 4.724228858947754,
|
| 48832 |
+
"learning_rate": 1.6578885156746548e-07,
|
| 48833 |
+
"loss": 0.0836,
|
| 48834 |
+
"step": 6423
|
| 48835 |
+
},
|
| 48836 |
+
{
|
| 48837 |
+
"epoch": 0.9478421246772408,
|
| 48838 |
+
"grad_norm": 1.4518014192581177,
|
| 48839 |
+
"learning_rate": 1.648560928271592e-07,
|
| 48840 |
+
"loss": 0.0291,
|
| 48841 |
+
"step": 6424
|
| 48842 |
+
},
|
| 48843 |
+
{
|
| 48844 |
+
"epoch": 0.9479896717078569,
|
| 48845 |
+
"grad_norm": 2.3440370559692383,
|
| 48846 |
+
"learning_rate": 1.6392594362222515e-07,
|
| 48847 |
+
"loss": 0.0662,
|
| 48848 |
+
"step": 6425
|
| 48849 |
+
},
|
| 48850 |
+
{
|
| 48851 |
+
"epoch": 0.9481372187384729,
|
| 48852 |
+
"grad_norm": 1.8037214279174805,
|
| 48853 |
+
"learning_rate": 1.629984041994559e-07,
|
| 48854 |
+
"loss": 0.0443,
|
| 48855 |
+
"step": 6426
|
| 48856 |
+
},
|
| 48857 |
+
{
|
| 48858 |
+
"epoch": 0.9482847657690889,
|
| 48859 |
+
"grad_norm": 0.7664615511894226,
|
| 48860 |
+
"learning_rate": 1.6207347480495462e-07,
|
| 48861 |
+
"loss": 0.0098,
|
| 48862 |
+
"step": 6427
|
| 48863 |
+
},
|
| 48864 |
+
{
|
| 48865 |
+
"epoch": 0.9484323127997049,
|
| 48866 |
+
"grad_norm": 3.4400036334991455,
|
| 48867 |
+
"learning_rate": 1.6115115568412942e-07,
|
| 48868 |
+
"loss": 0.084,
|
| 48869 |
+
"step": 6428
|
| 48870 |
+
},
|
| 48871 |
+
{
|
| 48872 |
+
"epoch": 0.9485798598303209,
|
| 48873 |
+
"grad_norm": 2.720244884490967,
|
| 48874 |
+
"learning_rate": 1.602314470816968e-07,
|
| 48875 |
+
"loss": 0.0446,
|
| 48876 |
+
"step": 6429
|
| 48877 |
+
},
|
| 48878 |
+
{
|
| 48879 |
+
"epoch": 0.9487274068609369,
|
| 48880 |
+
"grad_norm": 1.642297625541687,
|
| 48881 |
+
"learning_rate": 1.5931434924168377e-07,
|
| 48882 |
+
"loss": 0.0146,
|
| 48883 |
+
"step": 6430
|
| 48884 |
+
},
|
| 48885 |
+
{
|
| 48886 |
+
"epoch": 0.948874953891553,
|
| 48887 |
+
"grad_norm": 2.7700729370117188,
|
| 48888 |
+
"learning_rate": 1.583998624074179e-07,
|
| 48889 |
+
"loss": 0.0629,
|
| 48890 |
+
"step": 6431
|
| 48891 |
+
},
|
| 48892 |
+
{
|
| 48893 |
+
"epoch": 0.9490225009221689,
|
| 48894 |
+
"grad_norm": 4.600953578948975,
|
| 48895 |
+
"learning_rate": 1.5748798682154177e-07,
|
| 48896 |
+
"loss": 0.1125,
|
| 48897 |
+
"step": 6432
|
| 48898 |
+
},
|
| 48899 |
+
{
|
| 48900 |
+
"epoch": 0.9491700479527849,
|
| 48901 |
+
"grad_norm": 1.555990219116211,
|
| 48902 |
+
"learning_rate": 1.5657872272599738e-07,
|
| 48903 |
+
"loss": 0.0382,
|
| 48904 |
+
"step": 6433
|
| 48905 |
+
},
|
| 48906 |
+
{
|
| 48907 |
+
"epoch": 0.949317594983401,
|
| 48908 |
+
"grad_norm": 2.383833408355713,
|
| 48909 |
+
"learning_rate": 1.5567207036203957e-07,
|
| 48910 |
+
"loss": 0.0967,
|
| 48911 |
+
"step": 6434
|
| 48912 |
+
},
|
| 48913 |
+
{
|
| 48914 |
+
"epoch": 0.949465142014017,
|
| 48915 |
+
"grad_norm": 3.316439390182495,
|
| 48916 |
+
"learning_rate": 1.5476802997022812e-07,
|
| 48917 |
+
"loss": 0.1172,
|
| 48918 |
+
"step": 6435
|
| 48919 |
+
},
|
| 48920 |
+
{
|
| 48921 |
+
"epoch": 0.949612689044633,
|
| 48922 |
+
"grad_norm": 4.898162841796875,
|
| 48923 |
+
"learning_rate": 1.538666017904311e-07,
|
| 48924 |
+
"loss": 0.1416,
|
| 48925 |
+
"step": 6436
|
| 48926 |
+
},
|
| 48927 |
+
{
|
| 48928 |
+
"epoch": 0.949760236075249,
|
| 48929 |
+
"grad_norm": 2.7083256244659424,
|
| 48930 |
+
"learning_rate": 1.5296778606181839e-07,
|
| 48931 |
+
"loss": 0.057,
|
| 48932 |
+
"step": 6437
|
| 48933 |
+
},
|
| 48934 |
+
{
|
| 48935 |
+
"epoch": 0.949907783105865,
|
| 48936 |
+
"grad_norm": 2.0090880393981934,
|
| 48937 |
+
"learning_rate": 1.5207158302287472e-07,
|
| 48938 |
+
"loss": 0.0365,
|
| 48939 |
+
"step": 6438
|
| 48940 |
+
},
|
| 48941 |
+
{
|
| 48942 |
+
"epoch": 0.950055330136481,
|
| 48943 |
+
"grad_norm": 2.323190689086914,
|
| 48944 |
+
"learning_rate": 1.5117799291138657e-07,
|
| 48945 |
+
"loss": 0.0455,
|
| 48946 |
+
"step": 6439
|
| 48947 |
+
},
|
| 48948 |
+
{
|
| 48949 |
+
"epoch": 0.9502028771670971,
|
| 48950 |
+
"grad_norm": 2.2493746280670166,
|
| 48951 |
+
"learning_rate": 1.502870159644465e-07,
|
| 48952 |
+
"loss": 0.0496,
|
| 48953 |
+
"step": 6440
|
| 48954 |
+
},
|
| 48955 |
+
{
|
| 48956 |
+
"epoch": 0.9502028771670971,
|
| 48957 |
+
"eval_accuracy": 0.9782923299565847,
|
| 48958 |
+
"eval_f1": 0.9629629629629629,
|
| 48959 |
+
"eval_loss": 0.055333010852336884,
|
| 48960 |
+
"eval_precision": 0.9798994974874372,
|
| 48961 |
+
"eval_recall": 0.9466019417475728,
|
| 48962 |
+
"eval_runtime": 49.0651,
|
| 48963 |
+
"eval_samples_per_second": 5.931,
|
| 48964 |
+
"eval_steps_per_second": 0.204,
|
| 48965 |
+
"step": 6440
|
| 48966 |
+
},
|
| 48967 |
+
{
|
| 48968 |
+
"epoch": 0.950350424197713,
|
| 48969 |
+
"grad_norm": 9.028923034667969,
|
| 48970 |
+
"learning_rate": 1.4939865241845652e-07,
|
| 48971 |
+
"loss": 0.1114,
|
| 48972 |
+
"step": 6441
|
| 48973 |
+
},
|
| 48974 |
+
{
|
| 48975 |
+
"epoch": 0.950497971228329,
|
| 48976 |
+
"grad_norm": 3.4927148818969727,
|
| 48977 |
+
"learning_rate": 1.4851290250912365e-07,
|
| 48978 |
+
"loss": 0.0462,
|
| 48979 |
+
"step": 6442
|
| 48980 |
+
},
|
| 48981 |
+
{
|
| 48982 |
+
"epoch": 0.950645518258945,
|
| 48983 |
+
"grad_norm": 1.9196674823760986,
|
| 48984 |
+
"learning_rate": 1.476297664714621e-07,
|
| 48985 |
+
"loss": 0.0741,
|
| 48986 |
+
"step": 6443
|
| 48987 |
+
},
|
| 48988 |
+
{
|
| 48989 |
+
"epoch": 0.9507930652895611,
|
| 48990 |
+
"grad_norm": 0.9236531257629395,
|
| 48991 |
+
"learning_rate": 1.4674924453979223e-07,
|
| 48992 |
+
"loss": 0.0087,
|
| 48993 |
+
"step": 6444
|
| 48994 |
+
},
|
| 48995 |
+
{
|
| 48996 |
+
"epoch": 0.950940612320177,
|
| 48997 |
+
"grad_norm": 1.8691339492797852,
|
| 48998 |
+
"learning_rate": 1.4587133694774048e-07,
|
| 48999 |
+
"loss": 0.0449,
|
| 49000 |
+
"step": 6445
|
| 49001 |
+
},
|
| 49002 |
+
{
|
| 49003 |
+
"epoch": 0.951088159350793,
|
| 49004 |
+
"grad_norm": 3.3348920345306396,
|
| 49005 |
+
"learning_rate": 1.4499604392824052e-07,
|
| 49006 |
+
"loss": 0.0528,
|
| 49007 |
+
"step": 6446
|
| 49008 |
+
},
|
| 49009 |
+
{
|
| 49010 |
+
"epoch": 0.9512357063814091,
|
| 49011 |
+
"grad_norm": 3.1573545932769775,
|
| 49012 |
+
"learning_rate": 1.4412336571353103e-07,
|
| 49013 |
+
"loss": 0.1077,
|
| 49014 |
+
"step": 6447
|
| 49015 |
+
},
|
| 49016 |
+
{
|
| 49017 |
+
"epoch": 0.9513832534120251,
|
| 49018 |
+
"grad_norm": 1.6667567491531372,
|
| 49019 |
+
"learning_rate": 1.4325330253515902e-07,
|
| 49020 |
+
"loss": 0.0289,
|
| 49021 |
+
"step": 6448
|
| 49022 |
+
},
|
| 49023 |
+
{
|
| 49024 |
+
"epoch": 0.951530800442641,
|
| 49025 |
+
"grad_norm": 2.5860416889190674,
|
| 49026 |
+
"learning_rate": 1.4238585462397536e-07,
|
| 49027 |
+
"loss": 0.076,
|
| 49028 |
+
"step": 6449
|
| 49029 |
+
},
|
| 49030 |
+
{
|
| 49031 |
+
"epoch": 0.9516783474732571,
|
| 49032 |
+
"grad_norm": 2.118489980697632,
|
| 49033 |
+
"learning_rate": 1.4152102221013708e-07,
|
| 49034 |
+
"loss": 0.0561,
|
| 49035 |
+
"step": 6450
|
| 49036 |
+
},
|
| 49037 |
+
{
|
| 49038 |
+
"epoch": 0.9518258945038731,
|
| 49039 |
+
"grad_norm": 1.936941385269165,
|
| 49040 |
+
"learning_rate": 1.4065880552310952e-07,
|
| 49041 |
+
"loss": 0.0525,
|
| 49042 |
+
"step": 6451
|
| 49043 |
+
},
|
| 49044 |
+
{
|
| 49045 |
+
"epoch": 0.9519734415344892,
|
| 49046 |
+
"grad_norm": 1.3135408163070679,
|
| 49047 |
+
"learning_rate": 1.3979920479166187e-07,
|
| 49048 |
+
"loss": 0.0513,
|
| 49049 |
+
"step": 6452
|
| 49050 |
+
},
|
| 49051 |
+
{
|
| 49052 |
+
"epoch": 0.9521209885651051,
|
| 49053 |
+
"grad_norm": 2.240351915359497,
|
| 49054 |
+
"learning_rate": 1.389422202438706e-07,
|
| 49055 |
+
"loss": 0.0541,
|
| 49056 |
+
"step": 6453
|
| 49057 |
+
},
|
| 49058 |
+
{
|
| 49059 |
+
"epoch": 0.9522685355957211,
|
| 49060 |
+
"grad_norm": 4.377660751342773,
|
| 49061 |
+
"learning_rate": 1.3808785210711606e-07,
|
| 49062 |
+
"loss": 0.0642,
|
| 49063 |
+
"step": 6454
|
| 49064 |
+
},
|
| 49065 |
+
{
|
| 49066 |
+
"epoch": 0.9524160826263371,
|
| 49067 |
+
"grad_norm": 4.16725492477417,
|
| 49068 |
+
"learning_rate": 1.3723610060808801e-07,
|
| 49069 |
+
"loss": 0.0707,
|
| 49070 |
+
"step": 6455
|
| 49071 |
+
},
|
| 49072 |
+
{
|
| 49073 |
+
"epoch": 0.9525636296569532,
|
| 49074 |
+
"grad_norm": 1.5783486366271973,
|
| 49075 |
+
"learning_rate": 1.3638696597277678e-07,
|
| 49076 |
+
"loss": 0.0347,
|
| 49077 |
+
"step": 6456
|
| 49078 |
+
},
|
| 49079 |
+
{
|
| 49080 |
+
"epoch": 0.9527111766875692,
|
| 49081 |
+
"grad_norm": 0.9976248741149902,
|
| 49082 |
+
"learning_rate": 1.3554044842648217e-07,
|
| 49083 |
+
"loss": 0.0198,
|
| 49084 |
+
"step": 6457
|
| 49085 |
+
},
|
| 49086 |
+
{
|
| 49087 |
+
"epoch": 0.9528587237181851,
|
| 49088 |
+
"grad_norm": 2.819964647293091,
|
| 49089 |
+
"learning_rate": 1.3469654819381118e-07,
|
| 49090 |
+
"loss": 0.0753,
|
| 49091 |
+
"step": 6458
|
| 49092 |
+
},
|
| 49093 |
+
{
|
| 49094 |
+
"epoch": 0.9530062707488012,
|
| 49095 |
+
"grad_norm": 0.9899864196777344,
|
| 49096 |
+
"learning_rate": 1.3385526549867022e-07,
|
| 49097 |
+
"loss": 0.013,
|
| 49098 |
+
"step": 6459
|
| 49099 |
+
},
|
| 49100 |
+
{
|
| 49101 |
+
"epoch": 0.9531538177794172,
|
| 49102 |
+
"grad_norm": 2.6590030193328857,
|
| 49103 |
+
"learning_rate": 1.3301660056427745e-07,
|
| 49104 |
+
"loss": 0.0809,
|
| 49105 |
+
"step": 6460
|
| 49106 |
+
},
|
| 49107 |
+
{
|
| 49108 |
+
"epoch": 0.9531538177794172,
|
| 49109 |
+
"eval_accuracy": 0.9782923299565847,
|
| 49110 |
+
"eval_f1": 0.9629629629629629,
|
| 49111 |
+
"eval_loss": 0.05532016232609749,
|
| 49112 |
+
"eval_precision": 0.9798994974874372,
|
| 49113 |
+
"eval_recall": 0.9466019417475728,
|
| 49114 |
+
"eval_runtime": 48.7081,
|
| 49115 |
+
"eval_samples_per_second": 5.974,
|
| 49116 |
+
"eval_steps_per_second": 0.205,
|
| 49117 |
+
"step": 6460
|
| 49118 |
+
},
|
| 49119 |
+
{
|
| 49120 |
+
"epoch": 0.9533013648100332,
|
| 49121 |
+
"grad_norm": 2.229666233062744,
|
| 49122 |
+
"learning_rate": 1.3218055361315262e-07,
|
| 49123 |
+
"loss": 0.0684,
|
| 49124 |
+
"step": 6461
|
| 49125 |
+
},
|
| 49126 |
+
{
|
| 49127 |
+
"epoch": 0.9534489118406492,
|
| 49128 |
+
"grad_norm": 2.2530157566070557,
|
| 49129 |
+
"learning_rate": 1.3134712486712165e-07,
|
| 49130 |
+
"loss": 0.0549,
|
| 49131 |
+
"step": 6462
|
| 49132 |
+
},
|
| 49133 |
+
{
|
| 49134 |
+
"epoch": 0.9535964588712652,
|
| 49135 |
+
"grad_norm": 2.005972146987915,
|
| 49136 |
+
"learning_rate": 1.3051631454731873e-07,
|
| 49137 |
+
"loss": 0.0462,
|
| 49138 |
+
"step": 6463
|
| 49139 |
+
},
|
| 49140 |
+
{
|
| 49141 |
+
"epoch": 0.9537440059018812,
|
| 49142 |
+
"grad_norm": 3.3792154788970947,
|
| 49143 |
+
"learning_rate": 1.2968812287417753e-07,
|
| 49144 |
+
"loss": 0.1276,
|
| 49145 |
+
"step": 6464
|
| 49146 |
+
},
|
| 49147 |
+
{
|
| 49148 |
+
"epoch": 0.9538915529324973,
|
| 49149 |
+
"grad_norm": 3.6405813694000244,
|
| 49150 |
+
"learning_rate": 1.288625500674412e-07,
|
| 49151 |
+
"loss": 0.0681,
|
| 49152 |
+
"step": 6465
|
| 49153 |
+
},
|
| 49154 |
+
{
|
| 49155 |
+
"epoch": 0.9540390999631132,
|
| 49156 |
+
"grad_norm": 1.7894163131713867,
|
| 49157 |
+
"learning_rate": 1.2803959634615782e-07,
|
| 49158 |
+
"loss": 0.0496,
|
| 49159 |
+
"step": 6466
|
| 49160 |
+
},
|
| 49161 |
+
{
|
| 49162 |
+
"epoch": 0.9541866469937292,
|
| 49163 |
+
"grad_norm": 0.9998230934143066,
|
| 49164 |
+
"learning_rate": 1.272192619286805e-07,
|
| 49165 |
+
"loss": 0.0204,
|
| 49166 |
+
"step": 6467
|
| 49167 |
+
},
|
| 49168 |
+
{
|
| 49169 |
+
"epoch": 0.9543341940243453,
|
| 49170 |
+
"grad_norm": 2.581663131713867,
|
| 49171 |
+
"learning_rate": 1.2640154703266405e-07,
|
| 49172 |
+
"loss": 0.0972,
|
| 49173 |
+
"step": 6468
|
| 49174 |
+
},
|
| 49175 |
+
{
|
| 49176 |
+
"epoch": 0.9544817410549613,
|
| 49177 |
+
"grad_norm": 1.5313342809677124,
|
| 49178 |
+
"learning_rate": 1.2558645187507267e-07,
|
| 49179 |
+
"loss": 0.0412,
|
| 49180 |
+
"step": 6469
|
| 49181 |
+
},
|
| 49182 |
+
{
|
| 49183 |
+
"epoch": 0.9546292880855772,
|
| 49184 |
+
"grad_norm": 2.740792751312256,
|
| 49185 |
+
"learning_rate": 1.247739766721734e-07,
|
| 49186 |
+
"loss": 0.0527,
|
| 49187 |
+
"step": 6470
|
| 49188 |
+
},
|
| 49189 |
+
{
|
| 49190 |
+
"epoch": 0.9547768351161933,
|
| 49191 |
+
"grad_norm": 1.9213531017303467,
|
| 49192 |
+
"learning_rate": 1.2396412163953709e-07,
|
| 49193 |
+
"loss": 0.0462,
|
| 49194 |
+
"step": 6471
|
| 49195 |
+
},
|
| 49196 |
+
{
|
| 49197 |
+
"epoch": 0.9549243821468093,
|
| 49198 |
+
"grad_norm": 2.4002788066864014,
|
| 49199 |
+
"learning_rate": 1.2315688699204298e-07,
|
| 49200 |
+
"loss": 0.0869,
|
| 49201 |
+
"step": 6472
|
| 49202 |
+
},
|
| 49203 |
+
{
|
| 49204 |
+
"epoch": 0.9550719291774253,
|
| 49205 |
+
"grad_norm": 4.3059186935424805,
|
| 49206 |
+
"learning_rate": 1.2235227294387085e-07,
|
| 49207 |
+
"loss": 0.0794,
|
| 49208 |
+
"step": 6473
|
| 49209 |
+
},
|
| 49210 |
+
{
|
| 49211 |
+
"epoch": 0.9552194762080413,
|
| 49212 |
+
"grad_norm": 2.3202598094940186,
|
| 49213 |
+
"learning_rate": 1.2155027970850776e-07,
|
| 49214 |
+
"loss": 0.0262,
|
| 49215 |
+
"step": 6474
|
| 49216 |
+
},
|
| 49217 |
+
{
|
| 49218 |
+
"epoch": 0.9553670232386573,
|
| 49219 |
+
"grad_norm": 2.168534517288208,
|
| 49220 |
+
"learning_rate": 1.2075090749874451e-07,
|
| 49221 |
+
"loss": 0.0299,
|
| 49222 |
+
"step": 6475
|
| 49223 |
+
},
|
| 49224 |
+
{
|
| 49225 |
+
"epoch": 0.9555145702692733,
|
| 49226 |
+
"grad_norm": 4.975533485412598,
|
| 49227 |
+
"learning_rate": 1.1995415652667598e-07,
|
| 49228 |
+
"loss": 0.1115,
|
| 49229 |
+
"step": 6476
|
| 49230 |
+
},
|
| 49231 |
+
{
|
| 49232 |
+
"epoch": 0.9556621172998894,
|
| 49233 |
+
"grad_norm": 2.08109450340271,
|
| 49234 |
+
"learning_rate": 1.1916002700370411e-07,
|
| 49235 |
+
"loss": 0.0311,
|
| 49236 |
+
"step": 6477
|
| 49237 |
+
},
|
| 49238 |
+
{
|
| 49239 |
+
"epoch": 0.9558096643305053,
|
| 49240 |
+
"grad_norm": 3.003537654876709,
|
| 49241 |
+
"learning_rate": 1.183685191405315e-07,
|
| 49242 |
+
"loss": 0.033,
|
| 49243 |
+
"step": 6478
|
| 49244 |
+
},
|
| 49245 |
+
{
|
| 49246 |
+
"epoch": 0.9559572113611213,
|
| 49247 |
+
"grad_norm": 1.9444817304611206,
|
| 49248 |
+
"learning_rate": 1.1757963314716791e-07,
|
| 49249 |
+
"loss": 0.051,
|
| 49250 |
+
"step": 6479
|
| 49251 |
+
},
|
| 49252 |
+
{
|
| 49253 |
+
"epoch": 0.9561047583917374,
|
| 49254 |
+
"grad_norm": 2.8040788173675537,
|
| 49255 |
+
"learning_rate": 1.1679336923292594e-07,
|
| 49256 |
+
"loss": 0.0706,
|
| 49257 |
+
"step": 6480
|
| 49258 |
+
},
|
| 49259 |
+
{
|
| 49260 |
+
"epoch": 0.9561047583917374,
|
| 49261 |
+
"eval_accuracy": 0.9797395079594791,
|
| 49262 |
+
"eval_f1": 0.9653465346534653,
|
| 49263 |
+
"eval_loss": 0.05492851138114929,
|
| 49264 |
+
"eval_precision": 0.9848484848484849,
|
| 49265 |
+
"eval_recall": 0.9466019417475728,
|
| 49266 |
+
"eval_runtime": 48.3996,
|
| 49267 |
+
"eval_samples_per_second": 6.012,
|
| 49268 |
+
"eval_steps_per_second": 0.207,
|
| 49269 |
+
"step": 6480
|
| 49270 |
+
},
|
| 49271 |
+
{
|
| 49272 |
+
"epoch": 0.9562523054223534,
|
| 49273 |
+
"grad_norm": 5.251060485839844,
|
| 49274 |
+
"learning_rate": 1.1600972760642426e-07,
|
| 49275 |
+
"loss": 0.0943,
|
| 49276 |
+
"step": 6481
|
| 49277 |
+
},
|
| 49278 |
+
{
|
| 49279 |
+
"epoch": 0.9563998524529694,
|
| 49280 |
+
"grad_norm": 2.8643219470977783,
|
| 49281 |
+
"learning_rate": 1.1522870847558432e-07,
|
| 49282 |
+
"loss": 0.0305,
|
| 49283 |
+
"step": 6482
|
| 49284 |
+
},
|
| 49285 |
+
{
|
| 49286 |
+
"epoch": 0.9565473994835854,
|
| 49287 |
+
"grad_norm": 3.2723355293273926,
|
| 49288 |
+
"learning_rate": 1.1445031204763146e-07,
|
| 49289 |
+
"loss": 0.0637,
|
| 49290 |
+
"step": 6483
|
| 49291 |
+
},
|
| 49292 |
+
{
|
| 49293 |
+
"epoch": 0.9566949465142014,
|
| 49294 |
+
"grad_norm": 3.418752908706665,
|
| 49295 |
+
"learning_rate": 1.1367453852909493e-07,
|
| 49296 |
+
"loss": 0.0994,
|
| 49297 |
+
"step": 6484
|
| 49298 |
+
},
|
| 49299 |
+
{
|
| 49300 |
+
"epoch": 0.9568424935448174,
|
| 49301 |
+
"grad_norm": 0.9884876608848572,
|
| 49302 |
+
"learning_rate": 1.1290138812581009e-07,
|
| 49303 |
+
"loss": 0.0218,
|
| 49304 |
+
"step": 6485
|
| 49305 |
+
},
|
| 49306 |
+
{
|
| 49307 |
+
"epoch": 0.9569900405754335,
|
| 49308 |
+
"grad_norm": 2.2045938968658447,
|
| 49309 |
+
"learning_rate": 1.1213086104291615e-07,
|
| 49310 |
+
"loss": 0.032,
|
| 49311 |
+
"step": 6486
|
| 49312 |
+
},
|
| 49313 |
+
{
|
| 49314 |
+
"epoch": 0.9571375876060494,
|
| 49315 |
+
"grad_norm": 2.34030818939209,
|
| 49316 |
+
"learning_rate": 1.1136295748485293e-07,
|
| 49317 |
+
"loss": 0.0321,
|
| 49318 |
+
"step": 6487
|
| 49319 |
+
},
|
| 49320 |
+
{
|
| 49321 |
+
"epoch": 0.9572851346366654,
|
| 49322 |
+
"grad_norm": 2.480587959289551,
|
| 49323 |
+
"learning_rate": 1.1059767765536856e-07,
|
| 49324 |
+
"loss": 0.0331,
|
| 49325 |
+
"step": 6488
|
| 49326 |
+
},
|
| 49327 |
+
{
|
| 49328 |
+
"epoch": 0.9574326816672815,
|
| 49329 |
+
"grad_norm": 2.711378574371338,
|
| 49330 |
+
"learning_rate": 1.098350217575117e-07,
|
| 49331 |
+
"loss": 0.0592,
|
| 49332 |
+
"step": 6489
|
| 49333 |
+
},
|
| 49334 |
+
{
|
| 49335 |
+
"epoch": 0.9575802286978975,
|
| 49336 |
+
"grad_norm": 1.1604716777801514,
|
| 49337 |
+
"learning_rate": 1.0907498999363609e-07,
|
| 49338 |
+
"loss": 0.015,
|
| 49339 |
+
"step": 6490
|
| 49340 |
+
},
|
| 49341 |
+
{
|
| 49342 |
+
"epoch": 0.9577277757285134,
|
| 49343 |
+
"grad_norm": 1.9541465044021606,
|
| 49344 |
+
"learning_rate": 1.0831758256539925e-07,
|
| 49345 |
+
"loss": 0.043,
|
| 49346 |
+
"step": 6491
|
| 49347 |
+
},
|
| 49348 |
+
{
|
| 49349 |
+
"epoch": 0.9578753227591295,
|
| 49350 |
+
"grad_norm": 6.780413627624512,
|
| 49351 |
+
"learning_rate": 1.075627996737627e-07,
|
| 49352 |
+
"loss": 0.0828,
|
| 49353 |
+
"step": 6492
|
| 49354 |
+
},
|
| 49355 |
+
{
|
| 49356 |
+
"epoch": 0.9580228697897455,
|
| 49357 |
+
"grad_norm": 0.8017694354057312,
|
| 49358 |
+
"learning_rate": 1.0681064151899068e-07,
|
| 49359 |
+
"loss": 0.0084,
|
| 49360 |
+
"step": 6493
|
| 49361 |
+
},
|
| 49362 |
+
{
|
| 49363 |
+
"epoch": 0.9581704168203615,
|
| 49364 |
+
"grad_norm": 1.1130073070526123,
|
| 49365 |
+
"learning_rate": 1.0606110830065131e-07,
|
| 49366 |
+
"loss": 0.011,
|
| 49367 |
+
"step": 6494
|
| 49368 |
+
},
|
| 49369 |
+
{
|
| 49370 |
+
"epoch": 0.9583179638509775,
|
| 49371 |
+
"grad_norm": 2.2371890544891357,
|
| 49372 |
+
"learning_rate": 1.0531420021761662e-07,
|
| 49373 |
+
"loss": 0.0579,
|
| 49374 |
+
"step": 6495
|
| 49375 |
+
},
|
| 49376 |
+
{
|
| 49377 |
+
"epoch": 0.9584655108815935,
|
| 49378 |
+
"grad_norm": 2.2667269706726074,
|
| 49379 |
+
"learning_rate": 1.0456991746806366e-07,
|
| 49380 |
+
"loss": 0.03,
|
| 49381 |
+
"step": 6496
|
| 49382 |
+
},
|
| 49383 |
+
{
|
| 49384 |
+
"epoch": 0.9586130579122095,
|
| 49385 |
+
"grad_norm": 0.9488065242767334,
|
| 49386 |
+
"learning_rate": 1.0382826024946891e-07,
|
| 49387 |
+
"loss": 0.0154,
|
| 49388 |
+
"step": 6497
|
| 49389 |
+
},
|
| 49390 |
+
{
|
| 49391 |
+
"epoch": 0.9587606049428256,
|
| 49392 |
+
"grad_norm": 2.35026478767395,
|
| 49393 |
+
"learning_rate": 1.0308922875861493e-07,
|
| 49394 |
+
"loss": 0.0377,
|
| 49395 |
+
"step": 6498
|
| 49396 |
+
},
|
| 49397 |
+
{
|
| 49398 |
+
"epoch": 0.9589081519734415,
|
| 49399 |
+
"grad_norm": 1.9522687196731567,
|
| 49400 |
+
"learning_rate": 1.0235282319158823e-07,
|
| 49401 |
+
"loss": 0.0769,
|
| 49402 |
+
"step": 6499
|
| 49403 |
+
},
|
| 49404 |
+
{
|
| 49405 |
+
"epoch": 0.9590556990040575,
|
| 49406 |
+
"grad_norm": 4.399130344390869,
|
| 49407 |
+
"learning_rate": 1.0161904374377696e-07,
|
| 49408 |
+
"loss": 0.0716,
|
| 49409 |
+
"step": 6500
|
| 49410 |
+
},
|
| 49411 |
+
{
|
| 49412 |
+
"epoch": 0.9590556990040575,
|
| 49413 |
+
"eval_accuracy": 0.9782923299565847,
|
| 49414 |
+
"eval_f1": 0.9629629629629629,
|
| 49415 |
+
"eval_loss": 0.05545896664261818,
|
| 49416 |
+
"eval_precision": 0.9798994974874372,
|
| 49417 |
+
"eval_recall": 0.9466019417475728,
|
| 49418 |
+
"eval_runtime": 50.2134,
|
| 49419 |
+
"eval_samples_per_second": 5.795,
|
| 49420 |
+
"eval_steps_per_second": 0.199,
|
| 49421 |
+
"step": 6500
|
| 49422 |
}
|
| 49423 |
],
|
| 49424 |
"logging_steps": 1,
|
|
|
|
| 49438 |
"attributes": {}
|
| 49439 |
}
|
| 49440 |
},
|
| 49441 |
+
"total_flos": 2.0029445559969382e+18,
|
| 49442 |
"train_batch_size": 8,
|
| 49443 |
"trial_name": null,
|
| 49444 |
"trial_params": null
|