Training in progress, step 6300, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b28a8c69423684ee4c64da8962a7bfc59ba0c98b1b135f97d468efb2d682b7f3
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:256356e5f5f129661266fd2ec5986d64e8a618f50386558442d8fd5e211f9d75
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4400418753cdae533886a325d8574dc0fd9e84c371d8423f3b0575671aff9b5
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6d0003531b8a67ee8629b1863a22b3c8772704ff5ae56a9428b25b3f9af27ca
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d59b6204db24eaafdf19a89c40f08932737a129af907b8fa01e86a38e864b7b
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a488e42a6c1233774282544efdbb895b44374f17a7953d74ea138b797268fdd1
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a157099efd1a2813560e813b422c6d600f68c33a2bb205d7f3a61370a041b79
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd7f94bcc3a523e515db8e62f1b61f8f766e6f97044ede3fb1d022d6fec18097
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f80073c48585f31ea8d8b021958a20a34c2dfc7e8e8ec02b7ace68d8369bd89d
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae3b6324078ae2ab8d58a5fe3558de31400b69d699a72fa9072c4fd896d7f841
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eeb00a30bd3348fef7fa7a0dc88bf9a7a5a32f4484761a26220beef20b2e2ee5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95a2a891e4d47fc182ed74e57aef0f749cc61efcda057957b66e209db024a9f5
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f98eaae48265d25e6b8b613f21a112d74712c3c7822c1f5228bd295d2e702437
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -47139,6 +47139,766 @@
|
|
| 47139 |
"eval_samples_per_second": 5.842,
|
| 47140 |
"eval_steps_per_second": 0.201,
|
| 47141 |
"step": 6200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47142 |
}
|
| 47143 |
],
|
| 47144 |
"logging_steps": 1,
|
|
@@ -47158,7 +47918,7 @@
|
|
| 47158 |
"attributes": {}
|
| 47159 |
}
|
| 47160 |
},
|
| 47161 |
-
"total_flos": 1.
|
| 47162 |
"train_batch_size": 8,
|
| 47163 |
"trial_name": null,
|
| 47164 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9295462928808558,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6300,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 47139 |
"eval_samples_per_second": 5.842,
|
| 47140 |
"eval_steps_per_second": 0.201,
|
| 47141 |
"step": 6200
|
| 47142 |
+
},
|
| 47143 |
+
{
|
| 47144 |
+
"epoch": 0.9149391368498709,
|
| 47145 |
+
"grad_norm": 1.5670089721679688,
|
| 47146 |
+
"learning_rate": 4.369273406291519e-07,
|
| 47147 |
+
"loss": 0.0568,
|
| 47148 |
+
"step": 6201
|
| 47149 |
+
},
|
| 47150 |
+
{
|
| 47151 |
+
"epoch": 0.9150866838804869,
|
| 47152 |
+
"grad_norm": 1.30291748046875,
|
| 47153 |
+
"learning_rate": 4.354226460843414e-07,
|
| 47154 |
+
"loss": 0.0265,
|
| 47155 |
+
"step": 6202
|
| 47156 |
+
},
|
| 47157 |
+
{
|
| 47158 |
+
"epoch": 0.9152342309111029,
|
| 47159 |
+
"grad_norm": 1.8413887023925781,
|
| 47160 |
+
"learning_rate": 4.339204892861215e-07,
|
| 47161 |
+
"loss": 0.0398,
|
| 47162 |
+
"step": 6203
|
| 47163 |
+
},
|
| 47164 |
+
{
|
| 47165 |
+
"epoch": 0.9153817779417189,
|
| 47166 |
+
"grad_norm": 2.272320032119751,
|
| 47167 |
+
"learning_rate": 4.3242087063305684e-07,
|
| 47168 |
+
"loss": 0.1022,
|
| 47169 |
+
"step": 6204
|
| 47170 |
+
},
|
| 47171 |
+
{
|
| 47172 |
+
"epoch": 0.915529324972335,
|
| 47173 |
+
"grad_norm": 2.3131356239318848,
|
| 47174 |
+
"learning_rate": 4.3092379052303457e-07,
|
| 47175 |
+
"loss": 0.0353,
|
| 47176 |
+
"step": 6205
|
| 47177 |
+
},
|
| 47178 |
+
{
|
| 47179 |
+
"epoch": 0.9156768720029509,
|
| 47180 |
+
"grad_norm": 4.904222011566162,
|
| 47181 |
+
"learning_rate": 4.294292493532737e-07,
|
| 47182 |
+
"loss": 0.0836,
|
| 47183 |
+
"step": 6206
|
| 47184 |
+
},
|
| 47185 |
+
{
|
| 47186 |
+
"epoch": 0.9158244190335669,
|
| 47187 |
+
"grad_norm": 1.8545290231704712,
|
| 47188 |
+
"learning_rate": 4.2793724752031807e-07,
|
| 47189 |
+
"loss": 0.0445,
|
| 47190 |
+
"step": 6207
|
| 47191 |
+
},
|
| 47192 |
+
{
|
| 47193 |
+
"epoch": 0.915971966064183,
|
| 47194 |
+
"grad_norm": 4.446885585784912,
|
| 47195 |
+
"learning_rate": 4.264477854200366e-07,
|
| 47196 |
+
"loss": 0.065,
|
| 47197 |
+
"step": 6208
|
| 47198 |
+
},
|
| 47199 |
+
{
|
| 47200 |
+
"epoch": 0.916119513094799,
|
| 47201 |
+
"grad_norm": 0.9898476004600525,
|
| 47202 |
+
"learning_rate": 4.24960863447621e-07,
|
| 47203 |
+
"loss": 0.0215,
|
| 47204 |
+
"step": 6209
|
| 47205 |
+
},
|
| 47206 |
+
{
|
| 47207 |
+
"epoch": 0.9162670601254149,
|
| 47208 |
+
"grad_norm": 2.3899457454681396,
|
| 47209 |
+
"learning_rate": 4.2347648199759784e-07,
|
| 47210 |
+
"loss": 0.0282,
|
| 47211 |
+
"step": 6210
|
| 47212 |
+
},
|
| 47213 |
+
{
|
| 47214 |
+
"epoch": 0.916414607156031,
|
| 47215 |
+
"grad_norm": 1.5149897336959839,
|
| 47216 |
+
"learning_rate": 4.219946414638132e-07,
|
| 47217 |
+
"loss": 0.0236,
|
| 47218 |
+
"step": 6211
|
| 47219 |
+
},
|
| 47220 |
+
{
|
| 47221 |
+
"epoch": 0.916562154186647,
|
| 47222 |
+
"grad_norm": 2.0021674633026123,
|
| 47223 |
+
"learning_rate": 4.205153422394381e-07,
|
| 47224 |
+
"loss": 0.0429,
|
| 47225 |
+
"step": 6212
|
| 47226 |
+
},
|
| 47227 |
+
{
|
| 47228 |
+
"epoch": 0.916709701217263,
|
| 47229 |
+
"grad_norm": 3.0172886848449707,
|
| 47230 |
+
"learning_rate": 4.1903858471697424e-07,
|
| 47231 |
+
"loss": 0.0445,
|
| 47232 |
+
"step": 6213
|
| 47233 |
+
},
|
| 47234 |
+
{
|
| 47235 |
+
"epoch": 0.9168572482478791,
|
| 47236 |
+
"grad_norm": 4.3301310539245605,
|
| 47237 |
+
"learning_rate": 4.175643692882436e-07,
|
| 47238 |
+
"loss": 0.1072,
|
| 47239 |
+
"step": 6214
|
| 47240 |
+
},
|
| 47241 |
+
{
|
| 47242 |
+
"epoch": 0.917004795278495,
|
| 47243 |
+
"grad_norm": 3.8663575649261475,
|
| 47244 |
+
"learning_rate": 4.160926963443979e-07,
|
| 47245 |
+
"loss": 0.0675,
|
| 47246 |
+
"step": 6215
|
| 47247 |
+
},
|
| 47248 |
+
{
|
| 47249 |
+
"epoch": 0.917152342309111,
|
| 47250 |
+
"grad_norm": 1.8558573722839355,
|
| 47251 |
+
"learning_rate": 4.1462356627591236e-07,
|
| 47252 |
+
"loss": 0.0268,
|
| 47253 |
+
"step": 6216
|
| 47254 |
+
},
|
| 47255 |
+
{
|
| 47256 |
+
"epoch": 0.9172998893397271,
|
| 47257 |
+
"grad_norm": 1.4661530256271362,
|
| 47258 |
+
"learning_rate": 4.131569794725876e-07,
|
| 47259 |
+
"loss": 0.0256,
|
| 47260 |
+
"step": 6217
|
| 47261 |
+
},
|
| 47262 |
+
{
|
| 47263 |
+
"epoch": 0.9174474363703431,
|
| 47264 |
+
"grad_norm": 1.4858981370925903,
|
| 47265 |
+
"learning_rate": 4.1169293632355114e-07,
|
| 47266 |
+
"loss": 0.0414,
|
| 47267 |
+
"step": 6218
|
| 47268 |
+
},
|
| 47269 |
+
{
|
| 47270 |
+
"epoch": 0.917594983400959,
|
| 47271 |
+
"grad_norm": 4.485099792480469,
|
| 47272 |
+
"learning_rate": 4.1023143721725e-07,
|
| 47273 |
+
"loss": 0.0801,
|
| 47274 |
+
"step": 6219
|
| 47275 |
+
},
|
| 47276 |
+
{
|
| 47277 |
+
"epoch": 0.917742530431575,
|
| 47278 |
+
"grad_norm": 3.052419662475586,
|
| 47279 |
+
"learning_rate": 4.0877248254146404e-07,
|
| 47280 |
+
"loss": 0.0737,
|
| 47281 |
+
"step": 6220
|
| 47282 |
+
},
|
| 47283 |
+
{
|
| 47284 |
+
"epoch": 0.917742530431575,
|
| 47285 |
+
"eval_accuracy": 0.9797395079594791,
|
| 47286 |
+
"eval_f1": 0.9653465346534653,
|
| 47287 |
+
"eval_loss": 0.05517810955643654,
|
| 47288 |
+
"eval_precision": 0.9848484848484849,
|
| 47289 |
+
"eval_recall": 0.9466019417475728,
|
| 47290 |
+
"eval_runtime": 50.2014,
|
| 47291 |
+
"eval_samples_per_second": 5.797,
|
| 47292 |
+
"eval_steps_per_second": 0.199,
|
| 47293 |
+
"step": 6220
|
| 47294 |
+
},
|
| 47295 |
+
{
|
| 47296 |
+
"epoch": 0.9178900774621911,
|
| 47297 |
+
"grad_norm": 2.319796562194824,
|
| 47298 |
+
"learning_rate": 4.0731607268329477e-07,
|
| 47299 |
+
"loss": 0.0639,
|
| 47300 |
+
"step": 6221
|
| 47301 |
+
},
|
| 47302 |
+
{
|
| 47303 |
+
"epoch": 0.9180376244928071,
|
| 47304 |
+
"grad_norm": 2.6878068447113037,
|
| 47305 |
+
"learning_rate": 4.058622080291652e-07,
|
| 47306 |
+
"loss": 0.0571,
|
| 47307 |
+
"step": 6222
|
| 47308 |
+
},
|
| 47309 |
+
{
|
| 47310 |
+
"epoch": 0.918185171523423,
|
| 47311 |
+
"grad_norm": 2.0226361751556396,
|
| 47312 |
+
"learning_rate": 4.0441088896482574e-07,
|
| 47313 |
+
"loss": 0.0681,
|
| 47314 |
+
"step": 6223
|
| 47315 |
+
},
|
| 47316 |
+
{
|
| 47317 |
+
"epoch": 0.9183327185540391,
|
| 47318 |
+
"grad_norm": 1.9138096570968628,
|
| 47319 |
+
"learning_rate": 4.029621158753538e-07,
|
| 47320 |
+
"loss": 0.0566,
|
| 47321 |
+
"step": 6224
|
| 47322 |
+
},
|
| 47323 |
+
{
|
| 47324 |
+
"epoch": 0.9184802655846551,
|
| 47325 |
+
"grad_norm": 2.777580976486206,
|
| 47326 |
+
"learning_rate": 4.015158891451476e-07,
|
| 47327 |
+
"loss": 0.0417,
|
| 47328 |
+
"step": 6225
|
| 47329 |
+
},
|
| 47330 |
+
{
|
| 47331 |
+
"epoch": 0.9186278126152712,
|
| 47332 |
+
"grad_norm": 1.0402213335037231,
|
| 47333 |
+
"learning_rate": 4.000722091579301e-07,
|
| 47334 |
+
"loss": 0.0174,
|
| 47335 |
+
"step": 6226
|
| 47336 |
+
},
|
| 47337 |
+
{
|
| 47338 |
+
"epoch": 0.9187753596458871,
|
| 47339 |
+
"grad_norm": 3.61226749420166,
|
| 47340 |
+
"learning_rate": 3.986310762967527e-07,
|
| 47341 |
+
"loss": 0.0224,
|
| 47342 |
+
"step": 6227
|
| 47343 |
+
},
|
| 47344 |
+
{
|
| 47345 |
+
"epoch": 0.9189229066765031,
|
| 47346 |
+
"grad_norm": 1.522729516029358,
|
| 47347 |
+
"learning_rate": 3.971924909439828e-07,
|
| 47348 |
+
"loss": 0.052,
|
| 47349 |
+
"step": 6228
|
| 47350 |
+
},
|
| 47351 |
+
{
|
| 47352 |
+
"epoch": 0.9190704537071192,
|
| 47353 |
+
"grad_norm": 3.099111557006836,
|
| 47354 |
+
"learning_rate": 3.9575645348132074e-07,
|
| 47355 |
+
"loss": 0.1204,
|
| 47356 |
+
"step": 6229
|
| 47357 |
+
},
|
| 47358 |
+
{
|
| 47359 |
+
"epoch": 0.9192180007377352,
|
| 47360 |
+
"grad_norm": 1.7007861137390137,
|
| 47361 |
+
"learning_rate": 3.943229642897861e-07,
|
| 47362 |
+
"loss": 0.0405,
|
| 47363 |
+
"step": 6230
|
| 47364 |
+
},
|
| 47365 |
+
{
|
| 47366 |
+
"epoch": 0.9193655477683511,
|
| 47367 |
+
"grad_norm": 5.425076484680176,
|
| 47368 |
+
"learning_rate": 3.9289202374972247e-07,
|
| 47369 |
+
"loss": 0.1331,
|
| 47370 |
+
"step": 6231
|
| 47371 |
+
},
|
| 47372 |
+
{
|
| 47373 |
+
"epoch": 0.9195130947989671,
|
| 47374 |
+
"grad_norm": 3.1894729137420654,
|
| 47375 |
+
"learning_rate": 3.9146363224079943e-07,
|
| 47376 |
+
"loss": 0.1074,
|
| 47377 |
+
"step": 6232
|
| 47378 |
+
},
|
| 47379 |
+
{
|
| 47380 |
+
"epoch": 0.9196606418295832,
|
| 47381 |
+
"grad_norm": 4.616338729858398,
|
| 47382 |
+
"learning_rate": 3.90037790142006e-07,
|
| 47383 |
+
"loss": 0.0738,
|
| 47384 |
+
"step": 6233
|
| 47385 |
+
},
|
| 47386 |
+
{
|
| 47387 |
+
"epoch": 0.9198081888601992,
|
| 47388 |
+
"grad_norm": 2.9994616508483887,
|
| 47389 |
+
"learning_rate": 3.886144978316586e-07,
|
| 47390 |
+
"loss": 0.1025,
|
| 47391 |
+
"step": 6234
|
| 47392 |
+
},
|
| 47393 |
+
{
|
| 47394 |
+
"epoch": 0.9199557358908153,
|
| 47395 |
+
"grad_norm": 3.527212142944336,
|
| 47396 |
+
"learning_rate": 3.8719375568739834e-07,
|
| 47397 |
+
"loss": 0.0572,
|
| 47398 |
+
"step": 6235
|
| 47399 |
+
},
|
| 47400 |
+
{
|
| 47401 |
+
"epoch": 0.9201032829214312,
|
| 47402 |
+
"grad_norm": 1.8691866397857666,
|
| 47403 |
+
"learning_rate": 3.8577556408618487e-07,
|
| 47404 |
+
"loss": 0.0501,
|
| 47405 |
+
"step": 6236
|
| 47406 |
+
},
|
| 47407 |
+
{
|
| 47408 |
+
"epoch": 0.9202508299520472,
|
| 47409 |
+
"grad_norm": 3.163731813430786,
|
| 47410 |
+
"learning_rate": 3.8435992340430383e-07,
|
| 47411 |
+
"loss": 0.0944,
|
| 47412 |
+
"step": 6237
|
| 47413 |
+
},
|
| 47414 |
+
{
|
| 47415 |
+
"epoch": 0.9203983769826632,
|
| 47416 |
+
"grad_norm": 2.161836862564087,
|
| 47417 |
+
"learning_rate": 3.829468340173637e-07,
|
| 47418 |
+
"loss": 0.0457,
|
| 47419 |
+
"step": 6238
|
| 47420 |
+
},
|
| 47421 |
+
{
|
| 47422 |
+
"epoch": 0.9205459240132793,
|
| 47423 |
+
"grad_norm": 3.0532407760620117,
|
| 47424 |
+
"learning_rate": 3.8153629630029666e-07,
|
| 47425 |
+
"loss": 0.0415,
|
| 47426 |
+
"step": 6239
|
| 47427 |
+
},
|
| 47428 |
+
{
|
| 47429 |
+
"epoch": 0.9206934710438952,
|
| 47430 |
+
"grad_norm": 2.5379703044891357,
|
| 47431 |
+
"learning_rate": 3.80128310627359e-07,
|
| 47432 |
+
"loss": 0.042,
|
| 47433 |
+
"step": 6240
|
| 47434 |
+
},
|
| 47435 |
+
{
|
| 47436 |
+
"epoch": 0.9206934710438952,
|
| 47437 |
+
"eval_accuracy": 0.9782923299565847,
|
| 47438 |
+
"eval_f1": 0.9629629629629629,
|
| 47439 |
+
"eval_loss": 0.0559084378182888,
|
| 47440 |
+
"eval_precision": 0.9798994974874372,
|
| 47441 |
+
"eval_recall": 0.9466019417475728,
|
| 47442 |
+
"eval_runtime": 49.6899,
|
| 47443 |
+
"eval_samples_per_second": 5.856,
|
| 47444 |
+
"eval_steps_per_second": 0.201,
|
| 47445 |
+
"step": 6240
|
| 47446 |
+
},
|
| 47447 |
+
{
|
| 47448 |
+
"epoch": 0.9208410180745112,
|
| 47449 |
+
"grad_norm": 2.550798177719116,
|
| 47450 |
+
"learning_rate": 3.787228773721252e-07,
|
| 47451 |
+
"loss": 0.1024,
|
| 47452 |
+
"step": 6241
|
| 47453 |
+
},
|
| 47454 |
+
{
|
| 47455 |
+
"epoch": 0.9209885651051273,
|
| 47456 |
+
"grad_norm": 5.740802764892578,
|
| 47457 |
+
"learning_rate": 3.773199969074959e-07,
|
| 47458 |
+
"loss": 0.0571,
|
| 47459 |
+
"step": 6242
|
| 47460 |
+
},
|
| 47461 |
+
{
|
| 47462 |
+
"epoch": 0.9211361121357433,
|
| 47463 |
+
"grad_norm": 3.259659767150879,
|
| 47464 |
+
"learning_rate": 3.759196696056955e-07,
|
| 47465 |
+
"loss": 0.0458,
|
| 47466 |
+
"step": 6243
|
| 47467 |
+
},
|
| 47468 |
+
{
|
| 47469 |
+
"epoch": 0.9212836591663592,
|
| 47470 |
+
"grad_norm": 1.5455894470214844,
|
| 47471 |
+
"learning_rate": 3.7452189583827017e-07,
|
| 47472 |
+
"loss": 0.0435,
|
| 47473 |
+
"step": 6244
|
| 47474 |
+
},
|
| 47475 |
+
{
|
| 47476 |
+
"epoch": 0.9214312061969753,
|
| 47477 |
+
"grad_norm": 3.3945140838623047,
|
| 47478 |
+
"learning_rate": 3.731266759760854e-07,
|
| 47479 |
+
"loss": 0.1067,
|
| 47480 |
+
"step": 6245
|
| 47481 |
+
},
|
| 47482 |
+
{
|
| 47483 |
+
"epoch": 0.9215787532275913,
|
| 47484 |
+
"grad_norm": 2.3547747135162354,
|
| 47485 |
+
"learning_rate": 3.717340103893341e-07,
|
| 47486 |
+
"loss": 0.0584,
|
| 47487 |
+
"step": 6246
|
| 47488 |
+
},
|
| 47489 |
+
{
|
| 47490 |
+
"epoch": 0.9217263002582073,
|
| 47491 |
+
"grad_norm": 1.9721163511276245,
|
| 47492 |
+
"learning_rate": 3.7034389944752613e-07,
|
| 47493 |
+
"loss": 0.0601,
|
| 47494 |
+
"step": 6247
|
| 47495 |
+
},
|
| 47496 |
+
{
|
| 47497 |
+
"epoch": 0.9218738472888233,
|
| 47498 |
+
"grad_norm": 4.440569877624512,
|
| 47499 |
+
"learning_rate": 3.689563435194976e-07,
|
| 47500 |
+
"loss": 0.1317,
|
| 47501 |
+
"step": 6248
|
| 47502 |
+
},
|
| 47503 |
+
{
|
| 47504 |
+
"epoch": 0.9220213943194393,
|
| 47505 |
+
"grad_norm": 1.7474677562713623,
|
| 47506 |
+
"learning_rate": 3.6757134297340735e-07,
|
| 47507 |
+
"loss": 0.044,
|
| 47508 |
+
"step": 6249
|
| 47509 |
+
},
|
| 47510 |
+
{
|
| 47511 |
+
"epoch": 0.9221689413500553,
|
| 47512 |
+
"grad_norm": 1.8465862274169922,
|
| 47513 |
+
"learning_rate": 3.661888981767314e-07,
|
| 47514 |
+
"loss": 0.0436,
|
| 47515 |
+
"step": 6250
|
| 47516 |
+
},
|
| 47517 |
+
{
|
| 47518 |
+
"epoch": 0.9223164883806714,
|
| 47519 |
+
"grad_norm": 1.5237339735031128,
|
| 47520 |
+
"learning_rate": 3.6480900949627306e-07,
|
| 47521 |
+
"loss": 0.0412,
|
| 47522 |
+
"step": 6251
|
| 47523 |
+
},
|
| 47524 |
+
{
|
| 47525 |
+
"epoch": 0.9224640354112873,
|
| 47526 |
+
"grad_norm": 4.066259384155273,
|
| 47527 |
+
"learning_rate": 3.6343167729815164e-07,
|
| 47528 |
+
"loss": 0.0675,
|
| 47529 |
+
"step": 6252
|
| 47530 |
+
},
|
| 47531 |
+
{
|
| 47532 |
+
"epoch": 0.9226115824419033,
|
| 47533 |
+
"grad_norm": 1.155721664428711,
|
| 47534 |
+
"learning_rate": 3.6205690194781487e-07,
|
| 47535 |
+
"loss": 0.0208,
|
| 47536 |
+
"step": 6253
|
| 47537 |
+
},
|
| 47538 |
+
{
|
| 47539 |
+
"epoch": 0.9227591294725194,
|
| 47540 |
+
"grad_norm": 2.956277370452881,
|
| 47541 |
+
"learning_rate": 3.606846838100264e-07,
|
| 47542 |
+
"loss": 0.0557,
|
| 47543 |
+
"step": 6254
|
| 47544 |
+
},
|
| 47545 |
+
{
|
| 47546 |
+
"epoch": 0.9229066765031354,
|
| 47547 |
+
"grad_norm": 2.8474464416503906,
|
| 47548 |
+
"learning_rate": 3.5931502324887624e-07,
|
| 47549 |
+
"loss": 0.1175,
|
| 47550 |
+
"step": 6255
|
| 47551 |
+
},
|
| 47552 |
+
{
|
| 47553 |
+
"epoch": 0.9230542235337513,
|
| 47554 |
+
"grad_norm": 0.9943166971206665,
|
| 47555 |
+
"learning_rate": 3.579479206277692e-07,
|
| 47556 |
+
"loss": 0.0091,
|
| 47557 |
+
"step": 6256
|
| 47558 |
+
},
|
| 47559 |
+
{
|
| 47560 |
+
"epoch": 0.9232017705643674,
|
| 47561 |
+
"grad_norm": 2.0411195755004883,
|
| 47562 |
+
"learning_rate": 3.565833763094373e-07,
|
| 47563 |
+
"loss": 0.0226,
|
| 47564 |
+
"step": 6257
|
| 47565 |
+
},
|
| 47566 |
+
{
|
| 47567 |
+
"epoch": 0.9233493175949834,
|
| 47568 |
+
"grad_norm": 3.972092390060425,
|
| 47569 |
+
"learning_rate": 3.552213906559343e-07,
|
| 47570 |
+
"loss": 0.0739,
|
| 47571 |
+
"step": 6258
|
| 47572 |
+
},
|
| 47573 |
+
{
|
| 47574 |
+
"epoch": 0.9234968646255994,
|
| 47575 |
+
"grad_norm": 2.7468929290771484,
|
| 47576 |
+
"learning_rate": 3.538619640286278e-07,
|
| 47577 |
+
"loss": 0.1084,
|
| 47578 |
+
"step": 6259
|
| 47579 |
+
},
|
| 47580 |
+
{
|
| 47581 |
+
"epoch": 0.9236444116562155,
|
| 47582 |
+
"grad_norm": 3.2310478687286377,
|
| 47583 |
+
"learning_rate": 3.52505096788216e-07,
|
| 47584 |
+
"loss": 0.0505,
|
| 47585 |
+
"step": 6260
|
| 47586 |
+
},
|
| 47587 |
+
{
|
| 47588 |
+
"epoch": 0.9236444116562155,
|
| 47589 |
+
"eval_accuracy": 0.9782923299565847,
|
| 47590 |
+
"eval_f1": 0.9629629629629629,
|
| 47591 |
+
"eval_loss": 0.05572787672281265,
|
| 47592 |
+
"eval_precision": 0.9798994974874372,
|
| 47593 |
+
"eval_recall": 0.9466019417475728,
|
| 47594 |
+
"eval_runtime": 51.4965,
|
| 47595 |
+
"eval_samples_per_second": 5.651,
|
| 47596 |
+
"eval_steps_per_second": 0.194,
|
| 47597 |
+
"step": 6260
|
| 47598 |
+
},
|
| 47599 |
+
{
|
| 47600 |
+
"epoch": 0.9237919586868314,
|
| 47601 |
+
"grad_norm": 2.4628522396087646,
|
| 47602 |
+
"learning_rate": 3.5115078929470856e-07,
|
| 47603 |
+
"loss": 0.1245,
|
| 47604 |
+
"step": 6261
|
| 47605 |
+
},
|
| 47606 |
+
{
|
| 47607 |
+
"epoch": 0.9239395057174474,
|
| 47608 |
+
"grad_norm": 2.1519012451171875,
|
| 47609 |
+
"learning_rate": 3.4979904190744486e-07,
|
| 47610 |
+
"loss": 0.0936,
|
| 47611 |
+
"step": 6262
|
| 47612 |
+
},
|
| 47613 |
+
{
|
| 47614 |
+
"epoch": 0.9240870527480635,
|
| 47615 |
+
"grad_norm": 1.2964609861373901,
|
| 47616 |
+
"learning_rate": 3.48449854985079e-07,
|
| 47617 |
+
"loss": 0.0132,
|
| 47618 |
+
"step": 6263
|
| 47619 |
+
},
|
| 47620 |
+
{
|
| 47621 |
+
"epoch": 0.9242345997786795,
|
| 47622 |
+
"grad_norm": 1.963150143623352,
|
| 47623 |
+
"learning_rate": 3.471032288855869e-07,
|
| 47624 |
+
"loss": 0.0431,
|
| 47625 |
+
"step": 6264
|
| 47626 |
+
},
|
| 47627 |
+
{
|
| 47628 |
+
"epoch": 0.9243821468092954,
|
| 47629 |
+
"grad_norm": 2.247939109802246,
|
| 47630 |
+
"learning_rate": 3.457591639662672e-07,
|
| 47631 |
+
"loss": 0.0297,
|
| 47632 |
+
"step": 6265
|
| 47633 |
+
},
|
| 47634 |
+
{
|
| 47635 |
+
"epoch": 0.9245296938399115,
|
| 47636 |
+
"grad_norm": 1.91328763961792,
|
| 47637 |
+
"learning_rate": 3.444176605837368e-07,
|
| 47638 |
+
"loss": 0.0626,
|
| 47639 |
+
"step": 6266
|
| 47640 |
+
},
|
| 47641 |
+
{
|
| 47642 |
+
"epoch": 0.9246772408705275,
|
| 47643 |
+
"grad_norm": 2.5009827613830566,
|
| 47644 |
+
"learning_rate": 3.430787190939322e-07,
|
| 47645 |
+
"loss": 0.0545,
|
| 47646 |
+
"step": 6267
|
| 47647 |
+
},
|
| 47648 |
+
{
|
| 47649 |
+
"epoch": 0.9248247879011435,
|
| 47650 |
+
"grad_norm": 0.9863361120223999,
|
| 47651 |
+
"learning_rate": 3.4174233985211467e-07,
|
| 47652 |
+
"loss": 0.0193,
|
| 47653 |
+
"step": 6268
|
| 47654 |
+
},
|
| 47655 |
+
{
|
| 47656 |
+
"epoch": 0.9249723349317595,
|
| 47657 |
+
"grad_norm": 1.2787401676177979,
|
| 47658 |
+
"learning_rate": 3.4040852321285954e-07,
|
| 47659 |
+
"loss": 0.0104,
|
| 47660 |
+
"step": 6269
|
| 47661 |
+
},
|
| 47662 |
+
{
|
| 47663 |
+
"epoch": 0.9251198819623755,
|
| 47664 |
+
"grad_norm": 2.8463070392608643,
|
| 47665 |
+
"learning_rate": 3.39077269530067e-07,
|
| 47666 |
+
"loss": 0.0565,
|
| 47667 |
+
"step": 6270
|
| 47668 |
+
},
|
| 47669 |
+
{
|
| 47670 |
+
"epoch": 0.9252674289929915,
|
| 47671 |
+
"grad_norm": 2.008657217025757,
|
| 47672 |
+
"learning_rate": 3.3774857915695346e-07,
|
| 47673 |
+
"loss": 0.0718,
|
| 47674 |
+
"step": 6271
|
| 47675 |
+
},
|
| 47676 |
+
{
|
| 47677 |
+
"epoch": 0.9254149760236076,
|
| 47678 |
+
"grad_norm": 0.9314476251602173,
|
| 47679 |
+
"learning_rate": 3.364224524460602e-07,
|
| 47680 |
+
"loss": 0.0236,
|
| 47681 |
+
"step": 6272
|
| 47682 |
+
},
|
| 47683 |
+
{
|
| 47684 |
+
"epoch": 0.9255625230542235,
|
| 47685 |
+
"grad_norm": 2.7881734371185303,
|
| 47686 |
+
"learning_rate": 3.3509888974924243e-07,
|
| 47687 |
+
"loss": 0.0565,
|
| 47688 |
+
"step": 6273
|
| 47689 |
+
},
|
| 47690 |
+
{
|
| 47691 |
+
"epoch": 0.9257100700848395,
|
| 47692 |
+
"grad_norm": 3.7052223682403564,
|
| 47693 |
+
"learning_rate": 3.3377789141768035e-07,
|
| 47694 |
+
"loss": 0.0528,
|
| 47695 |
+
"step": 6274
|
| 47696 |
+
},
|
| 47697 |
+
{
|
| 47698 |
+
"epoch": 0.9258576171154556,
|
| 47699 |
+
"grad_norm": 1.0346524715423584,
|
| 47700 |
+
"learning_rate": 3.324594578018681e-07,
|
| 47701 |
+
"loss": 0.0221,
|
| 47702 |
+
"step": 6275
|
| 47703 |
+
},
|
| 47704 |
+
{
|
| 47705 |
+
"epoch": 0.9260051641460716,
|
| 47706 |
+
"grad_norm": 1.8550926446914673,
|
| 47707 |
+
"learning_rate": 3.3114358925162573e-07,
|
| 47708 |
+
"loss": 0.0368,
|
| 47709 |
+
"step": 6276
|
| 47710 |
+
},
|
| 47711 |
+
{
|
| 47712 |
+
"epoch": 0.9261527111766875,
|
| 47713 |
+
"grad_norm": 4.289306163787842,
|
| 47714 |
+
"learning_rate": 3.298302861160885e-07,
|
| 47715 |
+
"loss": 0.0772,
|
| 47716 |
+
"step": 6277
|
| 47717 |
+
},
|
| 47718 |
+
{
|
| 47719 |
+
"epoch": 0.9263002582073036,
|
| 47720 |
+
"grad_norm": 3.7169032096862793,
|
| 47721 |
+
"learning_rate": 3.2851954874371095e-07,
|
| 47722 |
+
"loss": 0.116,
|
| 47723 |
+
"step": 6278
|
| 47724 |
+
},
|
| 47725 |
+
{
|
| 47726 |
+
"epoch": 0.9264478052379196,
|
| 47727 |
+
"grad_norm": 1.6580818891525269,
|
| 47728 |
+
"learning_rate": 3.272113774822694e-07,
|
| 47729 |
+
"loss": 0.0495,
|
| 47730 |
+
"step": 6279
|
| 47731 |
+
},
|
| 47732 |
+
{
|
| 47733 |
+
"epoch": 0.9265953522685356,
|
| 47734 |
+
"grad_norm": 2.004760980606079,
|
| 47735 |
+
"learning_rate": 3.2590577267885726e-07,
|
| 47736 |
+
"loss": 0.0424,
|
| 47737 |
+
"step": 6280
|
| 47738 |
+
},
|
| 47739 |
+
{
|
| 47740 |
+
"epoch": 0.9265953522685356,
|
| 47741 |
+
"eval_accuracy": 0.9797395079594791,
|
| 47742 |
+
"eval_f1": 0.9653465346534653,
|
| 47743 |
+
"eval_loss": 0.05522174760699272,
|
| 47744 |
+
"eval_precision": 0.9848484848484849,
|
| 47745 |
+
"eval_recall": 0.9466019417475728,
|
| 47746 |
+
"eval_runtime": 52.1889,
|
| 47747 |
+
"eval_samples_per_second": 5.576,
|
| 47748 |
+
"eval_steps_per_second": 0.192,
|
| 47749 |
+
"step": 6280
|
| 47750 |
+
},
|
| 47751 |
+
{
|
| 47752 |
+
"epoch": 0.9267428992991517,
|
| 47753 |
+
"grad_norm": 2.8334689140319824,
|
| 47754 |
+
"learning_rate": 3.2460273467988635e-07,
|
| 47755 |
+
"loss": 0.0398,
|
| 47756 |
+
"step": 6281
|
| 47757 |
+
},
|
| 47758 |
+
{
|
| 47759 |
+
"epoch": 0.9268904463297676,
|
| 47760 |
+
"grad_norm": 0.9800840616226196,
|
| 47761 |
+
"learning_rate": 3.233022638310901e-07,
|
| 47762 |
+
"loss": 0.0199,
|
| 47763 |
+
"step": 6282
|
| 47764 |
+
},
|
| 47765 |
+
{
|
| 47766 |
+
"epoch": 0.9270379933603836,
|
| 47767 |
+
"grad_norm": 1.883017897605896,
|
| 47768 |
+
"learning_rate": 3.2200436047752026e-07,
|
| 47769 |
+
"loss": 0.0243,
|
| 47770 |
+
"step": 6283
|
| 47771 |
+
},
|
| 47772 |
+
{
|
| 47773 |
+
"epoch": 0.9271855403909997,
|
| 47774 |
+
"grad_norm": 3.470026731491089,
|
| 47775 |
+
"learning_rate": 3.207090249635436e-07,
|
| 47776 |
+
"loss": 0.0418,
|
| 47777 |
+
"step": 6284
|
| 47778 |
+
},
|
| 47779 |
+
{
|
| 47780 |
+
"epoch": 0.9273330874216157,
|
| 47781 |
+
"grad_norm": 3.9536657333374023,
|
| 47782 |
+
"learning_rate": 3.194162576328508e-07,
|
| 47783 |
+
"loss": 0.1209,
|
| 47784 |
+
"step": 6285
|
| 47785 |
+
},
|
| 47786 |
+
{
|
| 47787 |
+
"epoch": 0.9274806344522316,
|
| 47788 |
+
"grad_norm": 1.0981996059417725,
|
| 47789 |
+
"learning_rate": 3.181260588284485e-07,
|
| 47790 |
+
"loss": 0.0188,
|
| 47791 |
+
"step": 6286
|
| 47792 |
+
},
|
| 47793 |
+
{
|
| 47794 |
+
"epoch": 0.9276281814828476,
|
| 47795 |
+
"grad_norm": 13.924962997436523,
|
| 47796 |
+
"learning_rate": 3.168384288926596e-07,
|
| 47797 |
+
"loss": 0.1309,
|
| 47798 |
+
"step": 6287
|
| 47799 |
+
},
|
| 47800 |
+
{
|
| 47801 |
+
"epoch": 0.9277757285134637,
|
| 47802 |
+
"grad_norm": 1.7328006029129028,
|
| 47803 |
+
"learning_rate": 3.155533681671319e-07,
|
| 47804 |
+
"loss": 0.0376,
|
| 47805 |
+
"step": 6288
|
| 47806 |
+
},
|
| 47807 |
+
{
|
| 47808 |
+
"epoch": 0.9279232755440797,
|
| 47809 |
+
"grad_norm": 7.636415481567383,
|
| 47810 |
+
"learning_rate": 3.1427087699282375e-07,
|
| 47811 |
+
"loss": 0.0774,
|
| 47812 |
+
"step": 6289
|
| 47813 |
+
},
|
| 47814 |
+
{
|
| 47815 |
+
"epoch": 0.9280708225746956,
|
| 47816 |
+
"grad_norm": 2.2514214515686035,
|
| 47817 |
+
"learning_rate": 3.1299095571001745e-07,
|
| 47818 |
+
"loss": 0.1002,
|
| 47819 |
+
"step": 6290
|
| 47820 |
+
},
|
| 47821 |
+
{
|
| 47822 |
+
"epoch": 0.9282183696053117,
|
| 47823 |
+
"grad_norm": 2.580007314682007,
|
| 47824 |
+
"learning_rate": 3.1171360465831245e-07,
|
| 47825 |
+
"loss": 0.066,
|
| 47826 |
+
"step": 6291
|
| 47827 |
+
},
|
| 47828 |
+
{
|
| 47829 |
+
"epoch": 0.9283659166359277,
|
| 47830 |
+
"grad_norm": 4.111058712005615,
|
| 47831 |
+
"learning_rate": 3.104388241766232e-07,
|
| 47832 |
+
"loss": 0.1297,
|
| 47833 |
+
"step": 6292
|
| 47834 |
+
},
|
| 47835 |
+
{
|
| 47836 |
+
"epoch": 0.9285134636665437,
|
| 47837 |
+
"grad_norm": 2.8205816745758057,
|
| 47838 |
+
"learning_rate": 3.091666146031858e-07,
|
| 47839 |
+
"loss": 0.0468,
|
| 47840 |
+
"step": 6293
|
| 47841 |
+
},
|
| 47842 |
+
{
|
| 47843 |
+
"epoch": 0.9286610106971597,
|
| 47844 |
+
"grad_norm": 1.6066216230392456,
|
| 47845 |
+
"learning_rate": 3.0789697627555124e-07,
|
| 47846 |
+
"loss": 0.0386,
|
| 47847 |
+
"step": 6294
|
| 47848 |
+
},
|
| 47849 |
+
{
|
| 47850 |
+
"epoch": 0.9288085577277757,
|
| 47851 |
+
"grad_norm": 2.5085225105285645,
|
| 47852 |
+
"learning_rate": 3.0662990953058803e-07,
|
| 47853 |
+
"loss": 0.0928,
|
| 47854 |
+
"step": 6295
|
| 47855 |
+
},
|
| 47856 |
+
{
|
| 47857 |
+
"epoch": 0.9289561047583917,
|
| 47858 |
+
"grad_norm": 1.2958418130874634,
|
| 47859 |
+
"learning_rate": 3.0536541470448824e-07,
|
| 47860 |
+
"loss": 0.0261,
|
| 47861 |
+
"step": 6296
|
| 47862 |
+
},
|
| 47863 |
+
{
|
| 47864 |
+
"epoch": 0.9291036517890078,
|
| 47865 |
+
"grad_norm": 1.3905576467514038,
|
| 47866 |
+
"learning_rate": 3.041034921327557e-07,
|
| 47867 |
+
"loss": 0.0206,
|
| 47868 |
+
"step": 6297
|
| 47869 |
+
},
|
| 47870 |
+
{
|
| 47871 |
+
"epoch": 0.9292511988196237,
|
| 47872 |
+
"grad_norm": 2.7028305530548096,
|
| 47873 |
+
"learning_rate": 3.028441421502115e-07,
|
| 47874 |
+
"loss": 0.0687,
|
| 47875 |
+
"step": 6298
|
| 47876 |
+
},
|
| 47877 |
+
{
|
| 47878 |
+
"epoch": 0.9293987458502397,
|
| 47879 |
+
"grad_norm": 2.6002049446105957,
|
| 47880 |
+
"learning_rate": 3.015873650909984e-07,
|
| 47881 |
+
"loss": 0.0873,
|
| 47882 |
+
"step": 6299
|
| 47883 |
+
},
|
| 47884 |
+
{
|
| 47885 |
+
"epoch": 0.9295462928808558,
|
| 47886 |
+
"grad_norm": 2.208272695541382,
|
| 47887 |
+
"learning_rate": 3.003331612885718e-07,
|
| 47888 |
+
"loss": 0.0506,
|
| 47889 |
+
"step": 6300
|
| 47890 |
+
},
|
| 47891 |
+
{
|
| 47892 |
+
"epoch": 0.9295462928808558,
|
| 47893 |
+
"eval_accuracy": 0.9782923299565847,
|
| 47894 |
+
"eval_f1": 0.9629629629629629,
|
| 47895 |
+
"eval_loss": 0.05601061135530472,
|
| 47896 |
+
"eval_precision": 0.9798994974874372,
|
| 47897 |
+
"eval_recall": 0.9466019417475728,
|
| 47898 |
+
"eval_runtime": 50.1847,
|
| 47899 |
+
"eval_samples_per_second": 5.799,
|
| 47900 |
+
"eval_steps_per_second": 0.199,
|
| 47901 |
+
"step": 6300
|
| 47902 |
}
|
| 47903 |
],
|
| 47904 |
"logging_steps": 1,
|
|
|
|
| 47918 |
"attributes": {}
|
| 47919 |
}
|
| 47920 |
},
|
| 47921 |
+
"total_flos": 1.9408628309913764e+18,
|
| 47922 |
"train_batch_size": 8,
|
| 47923 |
"trial_name": null,
|
| 47924 |
"trial_params": null
|