Training in progress, step 6100, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4558b733d32e90c4e5c89bcba7e81f8b773afc6aa52a225d4a1952b193271193
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:481608ef601eeee9cd85ec29231d62de3814d11712fe3bb63383faaa39db9e5b
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:596f270fa924dc50f57e12f2747dd1d30dfc07fc2ee00e143030c1b9a7de0239
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9be32303f0039603765d77ac706bef56128491b375b7cab5a7ca9e2dd0c20e1
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1dc871b1d1595e1e47cbc3a3462b01da1390680ed602cc4977fcc0ae598b0ab
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7d82623ea7825bea9aa6e58232cb5ab536747b4e2584fee539f8ebb85840589
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8827dca82cdf8c9dc0048ecc8da1ac0c4a5995aa9c070303bd1e4628bd21c2b1
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6644c5e24b376442f37af7277f310848ba0091903a3e17bb78348c667f27d6a
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6aa8e8f4afb4ad3590db680bccacca81a9fea479e638f91fd5eb34e67e733103
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2039e0dd851cf50efc5c92eae55ef9d90644f479d007e1a04912e5dfe8b441a
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:704157ddb23baa7ea252d705881891eb9017ede4c98afdcc2fe424b1da003854
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f75f460626823b08c0b5d748bd6e356df4fad31b4d6f1bee0ea68d6dd231541
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ea03a23b5e2bdcb4bd9a8db175e30d4861f4d46b3e4ebdc845dc49850878e7a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -45619,6 +45619,766 @@
|
|
| 45619 |
"eval_samples_per_second": 5.864,
|
| 45620 |
"eval_steps_per_second": 0.201,
|
| 45621 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45622 |
}
|
| 45623 |
],
|
| 45624 |
"logging_steps": 1,
|
|
@@ -45638,7 +46398,7 @@
|
|
| 45638 |
"attributes": {}
|
| 45639 |
}
|
| 45640 |
},
|
| 45641 |
-
"total_flos": 1.
|
| 45642 |
"train_batch_size": 8,
|
| 45643 |
"trial_name": null,
|
| 45644 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.900036886757654,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 45619 |
"eval_samples_per_second": 5.864,
|
| 45620 |
"eval_steps_per_second": 0.201,
|
| 45621 |
"step": 6000
|
| 45622 |
+
},
|
| 45623 |
+
{
|
| 45624 |
+
"epoch": 0.8854297307266691,
|
| 45625 |
+
"grad_norm": 1.9520748853683472,
|
| 45626 |
+
"learning_rate": 7.882895975685912e-07,
|
| 45627 |
+
"loss": 0.039,
|
| 45628 |
+
"step": 6001
|
| 45629 |
+
},
|
| 45630 |
+
{
|
| 45631 |
+
"epoch": 0.8855772777572851,
|
| 45632 |
+
"grad_norm": 1.3660613298416138,
|
| 45633 |
+
"learning_rate": 7.862862685205296e-07,
|
| 45634 |
+
"loss": 0.0214,
|
| 45635 |
+
"step": 6002
|
| 45636 |
+
},
|
| 45637 |
+
{
|
| 45638 |
+
"epoch": 0.8857248247879012,
|
| 45639 |
+
"grad_norm": 3.9320688247680664,
|
| 45640 |
+
"learning_rate": 7.842853841252463e-07,
|
| 45641 |
+
"loss": 0.0278,
|
| 45642 |
+
"step": 6003
|
| 45643 |
+
},
|
| 45644 |
+
{
|
| 45645 |
+
"epoch": 0.8858723718185172,
|
| 45646 |
+
"grad_norm": 1.4639867544174194,
|
| 45647 |
+
"learning_rate": 7.822869449136328e-07,
|
| 45648 |
+
"loss": 0.0421,
|
| 45649 |
+
"step": 6004
|
| 45650 |
+
},
|
| 45651 |
+
{
|
| 45652 |
+
"epoch": 0.8860199188491331,
|
| 45653 |
+
"grad_norm": 3.7693629264831543,
|
| 45654 |
+
"learning_rate": 7.802909514159285e-07,
|
| 45655 |
+
"loss": 0.091,
|
| 45656 |
+
"step": 6005
|
| 45657 |
+
},
|
| 45658 |
+
{
|
| 45659 |
+
"epoch": 0.8861674658797491,
|
| 45660 |
+
"grad_norm": 1.5250922441482544,
|
| 45661 |
+
"learning_rate": 7.782974041617253e-07,
|
| 45662 |
+
"loss": 0.0195,
|
| 45663 |
+
"step": 6006
|
| 45664 |
+
},
|
| 45665 |
+
{
|
| 45666 |
+
"epoch": 0.8863150129103652,
|
| 45667 |
+
"grad_norm": 3.0657765865325928,
|
| 45668 |
+
"learning_rate": 7.763063036799701e-07,
|
| 45669 |
+
"loss": 0.0338,
|
| 45670 |
+
"step": 6007
|
| 45671 |
+
},
|
| 45672 |
+
{
|
| 45673 |
+
"epoch": 0.8864625599409812,
|
| 45674 |
+
"grad_norm": 1.9614242315292358,
|
| 45675 |
+
"learning_rate": 7.743176504989513e-07,
|
| 45676 |
+
"loss": 0.0447,
|
| 45677 |
+
"step": 6008
|
| 45678 |
+
},
|
| 45679 |
+
{
|
| 45680 |
+
"epoch": 0.8866101069715971,
|
| 45681 |
+
"grad_norm": 3.2453866004943848,
|
| 45682 |
+
"learning_rate": 7.723314451463193e-07,
|
| 45683 |
+
"loss": 0.137,
|
| 45684 |
+
"step": 6009
|
| 45685 |
+
},
|
| 45686 |
+
{
|
| 45687 |
+
"epoch": 0.8867576540022132,
|
| 45688 |
+
"grad_norm": 2.51401424407959,
|
| 45689 |
+
"learning_rate": 7.703476881490634e-07,
|
| 45690 |
+
"loss": 0.098,
|
| 45691 |
+
"step": 6010
|
| 45692 |
+
},
|
| 45693 |
+
{
|
| 45694 |
+
"epoch": 0.8869052010328292,
|
| 45695 |
+
"grad_norm": 3.9559733867645264,
|
| 45696 |
+
"learning_rate": 7.683663800335328e-07,
|
| 45697 |
+
"loss": 0.0941,
|
| 45698 |
+
"step": 6011
|
| 45699 |
+
},
|
| 45700 |
+
{
|
| 45701 |
+
"epoch": 0.8870527480634453,
|
| 45702 |
+
"grad_norm": 1.9767736196517944,
|
| 45703 |
+
"learning_rate": 7.663875213254246e-07,
|
| 45704 |
+
"loss": 0.0472,
|
| 45705 |
+
"step": 6012
|
| 45706 |
+
},
|
| 45707 |
+
{
|
| 45708 |
+
"epoch": 0.8872002950940613,
|
| 45709 |
+
"grad_norm": 1.6465672254562378,
|
| 45710 |
+
"learning_rate": 7.644111125497822e-07,
|
| 45711 |
+
"loss": 0.0145,
|
| 45712 |
+
"step": 6013
|
| 45713 |
+
},
|
| 45714 |
+
{
|
| 45715 |
+
"epoch": 0.8873478421246772,
|
| 45716 |
+
"grad_norm": 1.3866339921951294,
|
| 45717 |
+
"learning_rate": 7.624371542310005e-07,
|
| 45718 |
+
"loss": 0.0386,
|
| 45719 |
+
"step": 6014
|
| 45720 |
+
},
|
| 45721 |
+
{
|
| 45722 |
+
"epoch": 0.8874953891552932,
|
| 45723 |
+
"grad_norm": 2.0481443405151367,
|
| 45724 |
+
"learning_rate": 7.604656468928262e-07,
|
| 45725 |
+
"loss": 0.0383,
|
| 45726 |
+
"step": 6015
|
| 45727 |
+
},
|
| 45728 |
+
{
|
| 45729 |
+
"epoch": 0.8876429361859093,
|
| 45730 |
+
"grad_norm": 3.9279582500457764,
|
| 45731 |
+
"learning_rate": 7.584965910583564e-07,
|
| 45732 |
+
"loss": 0.0489,
|
| 45733 |
+
"step": 6016
|
| 45734 |
+
},
|
| 45735 |
+
{
|
| 45736 |
+
"epoch": 0.8877904832165253,
|
| 45737 |
+
"grad_norm": 1.7083287239074707,
|
| 45738 |
+
"learning_rate": 7.565299872500331e-07,
|
| 45739 |
+
"loss": 0.0244,
|
| 45740 |
+
"step": 6017
|
| 45741 |
+
},
|
| 45742 |
+
{
|
| 45743 |
+
"epoch": 0.8879380302471412,
|
| 45744 |
+
"grad_norm": 1.2823542356491089,
|
| 45745 |
+
"learning_rate": 7.545658359896547e-07,
|
| 45746 |
+
"loss": 0.0164,
|
| 45747 |
+
"step": 6018
|
| 45748 |
+
},
|
| 45749 |
+
{
|
| 45750 |
+
"epoch": 0.8880855772777573,
|
| 45751 |
+
"grad_norm": 0.9202921390533447,
|
| 45752 |
+
"learning_rate": 7.526041377983596e-07,
|
| 45753 |
+
"loss": 0.0263,
|
| 45754 |
+
"step": 6019
|
| 45755 |
+
},
|
| 45756 |
+
{
|
| 45757 |
+
"epoch": 0.8882331243083733,
|
| 45758 |
+
"grad_norm": 1.0891423225402832,
|
| 45759 |
+
"learning_rate": 7.506448931966436e-07,
|
| 45760 |
+
"loss": 0.0291,
|
| 45761 |
+
"step": 6020
|
| 45762 |
+
},
|
| 45763 |
+
{
|
| 45764 |
+
"epoch": 0.8882331243083733,
|
| 45765 |
+
"eval_accuracy": 0.9782923299565847,
|
| 45766 |
+
"eval_f1": 0.9629629629629629,
|
| 45767 |
+
"eval_loss": 0.05567174404859543,
|
| 45768 |
+
"eval_precision": 0.9798994974874372,
|
| 45769 |
+
"eval_recall": 0.9466019417475728,
|
| 45770 |
+
"eval_runtime": 48.4257,
|
| 45771 |
+
"eval_samples_per_second": 6.009,
|
| 45772 |
+
"eval_steps_per_second": 0.207,
|
| 45773 |
+
"step": 6020
|
| 45774 |
+
},
|
| 45775 |
+
{
|
| 45776 |
+
"epoch": 0.8883806713389893,
|
| 45777 |
+
"grad_norm": 1.6009353399276733,
|
| 45778 |
+
"learning_rate": 7.486881027043491e-07,
|
| 45779 |
+
"loss": 0.0527,
|
| 45780 |
+
"step": 6021
|
| 45781 |
+
},
|
| 45782 |
+
{
|
| 45783 |
+
"epoch": 0.8885282183696053,
|
| 45784 |
+
"grad_norm": 6.035427570343018,
|
| 45785 |
+
"learning_rate": 7.467337668406638e-07,
|
| 45786 |
+
"loss": 0.0817,
|
| 45787 |
+
"step": 6022
|
| 45788 |
+
},
|
| 45789 |
+
{
|
| 45790 |
+
"epoch": 0.8886757654002213,
|
| 45791 |
+
"grad_norm": 1.1803969144821167,
|
| 45792 |
+
"learning_rate": 7.447818861241308e-07,
|
| 45793 |
+
"loss": 0.0216,
|
| 45794 |
+
"step": 6023
|
| 45795 |
+
},
|
| 45796 |
+
{
|
| 45797 |
+
"epoch": 0.8888233124308373,
|
| 45798 |
+
"grad_norm": 2.7183499336242676,
|
| 45799 |
+
"learning_rate": 7.428324610726345e-07,
|
| 45800 |
+
"loss": 0.0815,
|
| 45801 |
+
"step": 6024
|
| 45802 |
+
},
|
| 45803 |
+
{
|
| 45804 |
+
"epoch": 0.8889708594614534,
|
| 45805 |
+
"grad_norm": 1.5589689016342163,
|
| 45806 |
+
"learning_rate": 7.408854922034126e-07,
|
| 45807 |
+
"loss": 0.0311,
|
| 45808 |
+
"step": 6025
|
| 45809 |
+
},
|
| 45810 |
+
{
|
| 45811 |
+
"epoch": 0.8891184064920693,
|
| 45812 |
+
"grad_norm": 3.315922498703003,
|
| 45813 |
+
"learning_rate": 7.389409800330516e-07,
|
| 45814 |
+
"loss": 0.0731,
|
| 45815 |
+
"step": 6026
|
| 45816 |
+
},
|
| 45817 |
+
{
|
| 45818 |
+
"epoch": 0.8892659535226853,
|
| 45819 |
+
"grad_norm": 1.9708974361419678,
|
| 45820 |
+
"learning_rate": 7.369989250774812e-07,
|
| 45821 |
+
"loss": 0.0703,
|
| 45822 |
+
"step": 6027
|
| 45823 |
+
},
|
| 45824 |
+
{
|
| 45825 |
+
"epoch": 0.8894135005533014,
|
| 45826 |
+
"grad_norm": 1.7425222396850586,
|
| 45827 |
+
"learning_rate": 7.350593278519824e-07,
|
| 45828 |
+
"loss": 0.0496,
|
| 45829 |
+
"step": 6028
|
| 45830 |
+
},
|
| 45831 |
+
{
|
| 45832 |
+
"epoch": 0.8895610475839174,
|
| 45833 |
+
"grad_norm": 1.3247371912002563,
|
| 45834 |
+
"learning_rate": 7.331221888711859e-07,
|
| 45835 |
+
"loss": 0.0324,
|
| 45836 |
+
"step": 6029
|
| 45837 |
+
},
|
| 45838 |
+
{
|
| 45839 |
+
"epoch": 0.8897085946145333,
|
| 45840 |
+
"grad_norm": 1.3036816120147705,
|
| 45841 |
+
"learning_rate": 7.311875086490683e-07,
|
| 45842 |
+
"loss": 0.033,
|
| 45843 |
+
"step": 6030
|
| 45844 |
+
},
|
| 45845 |
+
{
|
| 45846 |
+
"epoch": 0.8898561416451494,
|
| 45847 |
+
"grad_norm": 2.608323097229004,
|
| 45848 |
+
"learning_rate": 7.292552876989511e-07,
|
| 45849 |
+
"loss": 0.0983,
|
| 45850 |
+
"step": 6031
|
| 45851 |
+
},
|
| 45852 |
+
{
|
| 45853 |
+
"epoch": 0.8900036886757654,
|
| 45854 |
+
"grad_norm": 2.148296356201172,
|
| 45855 |
+
"learning_rate": 7.273255265335088e-07,
|
| 45856 |
+
"loss": 0.0705,
|
| 45857 |
+
"step": 6032
|
| 45858 |
+
},
|
| 45859 |
+
{
|
| 45860 |
+
"epoch": 0.8901512357063814,
|
| 45861 |
+
"grad_norm": 2.8998477458953857,
|
| 45862 |
+
"learning_rate": 7.253982256647574e-07,
|
| 45863 |
+
"loss": 0.0512,
|
| 45864 |
+
"step": 6033
|
| 45865 |
+
},
|
| 45866 |
+
{
|
| 45867 |
+
"epoch": 0.8902987827369974,
|
| 45868 |
+
"grad_norm": 3.0811049938201904,
|
| 45869 |
+
"learning_rate": 7.234733856040654e-07,
|
| 45870 |
+
"loss": 0.1346,
|
| 45871 |
+
"step": 6034
|
| 45872 |
+
},
|
| 45873 |
+
{
|
| 45874 |
+
"epoch": 0.8904463297676134,
|
| 45875 |
+
"grad_norm": 2.189905881881714,
|
| 45876 |
+
"learning_rate": 7.215510068621467e-07,
|
| 45877 |
+
"loss": 0.0503,
|
| 45878 |
+
"step": 6035
|
| 45879 |
+
},
|
| 45880 |
+
{
|
| 45881 |
+
"epoch": 0.8905938767982294,
|
| 45882 |
+
"grad_norm": 3.884209394454956,
|
| 45883 |
+
"learning_rate": 7.196310899490577e-07,
|
| 45884 |
+
"loss": 0.0547,
|
| 45885 |
+
"step": 6036
|
| 45886 |
+
},
|
| 45887 |
+
{
|
| 45888 |
+
"epoch": 0.8907414238288455,
|
| 45889 |
+
"grad_norm": 1.5360444784164429,
|
| 45890 |
+
"learning_rate": 7.177136353742098e-07,
|
| 45891 |
+
"loss": 0.0523,
|
| 45892 |
+
"step": 6037
|
| 45893 |
+
},
|
| 45894 |
+
{
|
| 45895 |
+
"epoch": 0.8908889708594615,
|
| 45896 |
+
"grad_norm": 1.0694087743759155,
|
| 45897 |
+
"learning_rate": 7.157986436463537e-07,
|
| 45898 |
+
"loss": 0.022,
|
| 45899 |
+
"step": 6038
|
| 45900 |
+
},
|
| 45901 |
+
{
|
| 45902 |
+
"epoch": 0.8910365178900774,
|
| 45903 |
+
"grad_norm": 1.7816051244735718,
|
| 45904 |
+
"learning_rate": 7.138861152735898e-07,
|
| 45905 |
+
"loss": 0.0618,
|
| 45906 |
+
"step": 6039
|
| 45907 |
+
},
|
| 45908 |
+
{
|
| 45909 |
+
"epoch": 0.8911840649206935,
|
| 45910 |
+
"grad_norm": 2.7336528301239014,
|
| 45911 |
+
"learning_rate": 7.119760507633678e-07,
|
| 45912 |
+
"loss": 0.0872,
|
| 45913 |
+
"step": 6040
|
| 45914 |
+
},
|
| 45915 |
+
{
|
| 45916 |
+
"epoch": 0.8911840649206935,
|
| 45917 |
+
"eval_accuracy": 0.9782923299565847,
|
| 45918 |
+
"eval_f1": 0.9629629629629629,
|
| 45919 |
+
"eval_loss": 0.05596858263015747,
|
| 45920 |
+
"eval_precision": 0.9798994974874372,
|
| 45921 |
+
"eval_recall": 0.9466019417475728,
|
| 45922 |
+
"eval_runtime": 48.9314,
|
| 45923 |
+
"eval_samples_per_second": 5.947,
|
| 45924 |
+
"eval_steps_per_second": 0.204,
|
| 45925 |
+
"step": 6040
|
| 45926 |
+
},
|
| 45927 |
+
{
|
| 45928 |
+
"epoch": 0.8913316119513095,
|
| 45929 |
+
"grad_norm": 1.510573148727417,
|
| 45930 |
+
"learning_rate": 7.100684506224775e-07,
|
| 45931 |
+
"loss": 0.0494,
|
| 45932 |
+
"step": 6041
|
| 45933 |
+
},
|
| 45934 |
+
{
|
| 45935 |
+
"epoch": 0.8914791589819255,
|
| 45936 |
+
"grad_norm": 4.796574115753174,
|
| 45937 |
+
"learning_rate": 7.081633153570577e-07,
|
| 45938 |
+
"loss": 0.0196,
|
| 45939 |
+
"step": 6042
|
| 45940 |
+
},
|
| 45941 |
+
{
|
| 45942 |
+
"epoch": 0.8916267060125415,
|
| 45943 |
+
"grad_norm": 1.6497973203659058,
|
| 45944 |
+
"learning_rate": 7.06260645472594e-07,
|
| 45945 |
+
"loss": 0.0334,
|
| 45946 |
+
"step": 6043
|
| 45947 |
+
},
|
| 45948 |
+
{
|
| 45949 |
+
"epoch": 0.8917742530431575,
|
| 45950 |
+
"grad_norm": 1.3166993856430054,
|
| 45951 |
+
"learning_rate": 7.043604414739203e-07,
|
| 45952 |
+
"loss": 0.0214,
|
| 45953 |
+
"step": 6044
|
| 45954 |
+
},
|
| 45955 |
+
{
|
| 45956 |
+
"epoch": 0.8919218000737735,
|
| 45957 |
+
"grad_norm": 3.4348347187042236,
|
| 45958 |
+
"learning_rate": 7.024627038652077e-07,
|
| 45959 |
+
"loss": 0.0721,
|
| 45960 |
+
"step": 6045
|
| 45961 |
+
},
|
| 45962 |
+
{
|
| 45963 |
+
"epoch": 0.8920693471043896,
|
| 45964 |
+
"grad_norm": 2.7059199810028076,
|
| 45965 |
+
"learning_rate": 7.005674331499834e-07,
|
| 45966 |
+
"loss": 0.0711,
|
| 45967 |
+
"step": 6046
|
| 45968 |
+
},
|
| 45969 |
+
{
|
| 45970 |
+
"epoch": 0.8922168941350055,
|
| 45971 |
+
"grad_norm": 2.4796156883239746,
|
| 45972 |
+
"learning_rate": 6.98674629831112e-07,
|
| 45973 |
+
"loss": 0.0405,
|
| 45974 |
+
"step": 6047
|
| 45975 |
+
},
|
| 45976 |
+
{
|
| 45977 |
+
"epoch": 0.8923644411656215,
|
| 45978 |
+
"grad_norm": 5.228332042694092,
|
| 45979 |
+
"learning_rate": 6.96784294410806e-07,
|
| 45980 |
+
"loss": 0.1015,
|
| 45981 |
+
"step": 6048
|
| 45982 |
+
},
|
| 45983 |
+
{
|
| 45984 |
+
"epoch": 0.8925119881962376,
|
| 45985 |
+
"grad_norm": 2.4152002334594727,
|
| 45986 |
+
"learning_rate": 6.948964273906278e-07,
|
| 45987 |
+
"loss": 0.0533,
|
| 45988 |
+
"step": 6049
|
| 45989 |
+
},
|
| 45990 |
+
{
|
| 45991 |
+
"epoch": 0.8926595352268536,
|
| 45992 |
+
"grad_norm": 3.5302391052246094,
|
| 45993 |
+
"learning_rate": 6.930110292714765e-07,
|
| 45994 |
+
"loss": 0.0874,
|
| 45995 |
+
"step": 6050
|
| 45996 |
+
},
|
| 45997 |
+
{
|
| 45998 |
+
"epoch": 0.8928070822574695,
|
| 45999 |
+
"grad_norm": 1.5268058776855469,
|
| 46000 |
+
"learning_rate": 6.911281005536031e-07,
|
| 46001 |
+
"loss": 0.037,
|
| 46002 |
+
"step": 6051
|
| 46003 |
+
},
|
| 46004 |
+
{
|
| 46005 |
+
"epoch": 0.8929546292880856,
|
| 46006 |
+
"grad_norm": 2.8932971954345703,
|
| 46007 |
+
"learning_rate": 6.892476417365989e-07,
|
| 46008 |
+
"loss": 0.0532,
|
| 46009 |
+
"step": 6052
|
| 46010 |
+
},
|
| 46011 |
+
{
|
| 46012 |
+
"epoch": 0.8931021763187016,
|
| 46013 |
+
"grad_norm": 2.512969732284546,
|
| 46014 |
+
"learning_rate": 6.873696533193996e-07,
|
| 46015 |
+
"loss": 0.0757,
|
| 46016 |
+
"step": 6053
|
| 46017 |
+
},
|
| 46018 |
+
{
|
| 46019 |
+
"epoch": 0.8932497233493176,
|
| 46020 |
+
"grad_norm": 3.8057785034179688,
|
| 46021 |
+
"learning_rate": 6.85494135800292e-07,
|
| 46022 |
+
"loss": 0.036,
|
| 46023 |
+
"step": 6054
|
| 46024 |
+
},
|
| 46025 |
+
{
|
| 46026 |
+
"epoch": 0.8933972703799336,
|
| 46027 |
+
"grad_norm": 3.723708391189575,
|
| 46028 |
+
"learning_rate": 6.836210896769014e-07,
|
| 46029 |
+
"loss": 0.0765,
|
| 46030 |
+
"step": 6055
|
| 46031 |
+
},
|
| 46032 |
+
{
|
| 46033 |
+
"epoch": 0.8935448174105496,
|
| 46034 |
+
"grad_norm": 2.7616844177246094,
|
| 46035 |
+
"learning_rate": 6.817505154461956e-07,
|
| 46036 |
+
"loss": 0.0504,
|
| 46037 |
+
"step": 6056
|
| 46038 |
+
},
|
| 46039 |
+
{
|
| 46040 |
+
"epoch": 0.8936923644411656,
|
| 46041 |
+
"grad_norm": 2.264693260192871,
|
| 46042 |
+
"learning_rate": 6.798824136044913e-07,
|
| 46043 |
+
"loss": 0.0518,
|
| 46044 |
+
"step": 6057
|
| 46045 |
+
},
|
| 46046 |
+
{
|
| 46047 |
+
"epoch": 0.8938399114717817,
|
| 46048 |
+
"grad_norm": 4.492088317871094,
|
| 46049 |
+
"learning_rate": 6.780167846474506e-07,
|
| 46050 |
+
"loss": 0.1116,
|
| 46051 |
+
"step": 6058
|
| 46052 |
+
},
|
| 46053 |
+
{
|
| 46054 |
+
"epoch": 0.8939874585023977,
|
| 46055 |
+
"grad_norm": 5.52939510345459,
|
| 46056 |
+
"learning_rate": 6.761536290700721e-07,
|
| 46057 |
+
"loss": 0.0688,
|
| 46058 |
+
"step": 6059
|
| 46059 |
+
},
|
| 46060 |
+
{
|
| 46061 |
+
"epoch": 0.8941350055330136,
|
| 46062 |
+
"grad_norm": 2.8091180324554443,
|
| 46063 |
+
"learning_rate": 6.742929473667048e-07,
|
| 46064 |
+
"loss": 0.0819,
|
| 46065 |
+
"step": 6060
|
| 46066 |
+
},
|
| 46067 |
+
{
|
| 46068 |
+
"epoch": 0.8941350055330136,
|
| 46069 |
+
"eval_accuracy": 0.9782923299565847,
|
| 46070 |
+
"eval_f1": 0.9629629629629629,
|
| 46071 |
+
"eval_loss": 0.0556659959256649,
|
| 46072 |
+
"eval_precision": 0.9798994974874372,
|
| 46073 |
+
"eval_recall": 0.9466019417475728,
|
| 46074 |
+
"eval_runtime": 48.6299,
|
| 46075 |
+
"eval_samples_per_second": 5.984,
|
| 46076 |
+
"eval_steps_per_second": 0.206,
|
| 46077 |
+
"step": 6060
|
| 46078 |
+
},
|
| 46079 |
+
{
|
| 46080 |
+
"epoch": 0.8942825525636297,
|
| 46081 |
+
"grad_norm": 2.870699167251587,
|
| 46082 |
+
"learning_rate": 6.724347400310371e-07,
|
| 46083 |
+
"loss": 0.0833,
|
| 46084 |
+
"step": 6061
|
| 46085 |
+
},
|
| 46086 |
+
{
|
| 46087 |
+
"epoch": 0.8944300995942457,
|
| 46088 |
+
"grad_norm": 2.0958855152130127,
|
| 46089 |
+
"learning_rate": 6.705790075561047e-07,
|
| 46090 |
+
"loss": 0.0436,
|
| 46091 |
+
"step": 6062
|
| 46092 |
+
},
|
| 46093 |
+
{
|
| 46094 |
+
"epoch": 0.8945776466248617,
|
| 46095 |
+
"grad_norm": 3.7520968914031982,
|
| 46096 |
+
"learning_rate": 6.687257504342848e-07,
|
| 46097 |
+
"loss": 0.0666,
|
| 46098 |
+
"step": 6063
|
| 46099 |
+
},
|
| 46100 |
+
{
|
| 46101 |
+
"epoch": 0.8947251936554776,
|
| 46102 |
+
"grad_norm": 1.8063980340957642,
|
| 46103 |
+
"learning_rate": 6.668749691572951e-07,
|
| 46104 |
+
"loss": 0.0345,
|
| 46105 |
+
"step": 6064
|
| 46106 |
+
},
|
| 46107 |
+
{
|
| 46108 |
+
"epoch": 0.8948727406860937,
|
| 46109 |
+
"grad_norm": 4.453982830047607,
|
| 46110 |
+
"learning_rate": 6.650266642162029e-07,
|
| 46111 |
+
"loss": 0.1076,
|
| 46112 |
+
"step": 6065
|
| 46113 |
+
},
|
| 46114 |
+
{
|
| 46115 |
+
"epoch": 0.8950202877167097,
|
| 46116 |
+
"grad_norm": 2.0510618686676025,
|
| 46117 |
+
"learning_rate": 6.631808361014113e-07,
|
| 46118 |
+
"loss": 0.0227,
|
| 46119 |
+
"step": 6066
|
| 46120 |
+
},
|
| 46121 |
+
{
|
| 46122 |
+
"epoch": 0.8951678347473258,
|
| 46123 |
+
"grad_norm": 2.578852415084839,
|
| 46124 |
+
"learning_rate": 6.613374853026689e-07,
|
| 46125 |
+
"loss": 0.04,
|
| 46126 |
+
"step": 6067
|
| 46127 |
+
},
|
| 46128 |
+
{
|
| 46129 |
+
"epoch": 0.8953153817779417,
|
| 46130 |
+
"grad_norm": 2.0627481937408447,
|
| 46131 |
+
"learning_rate": 6.594966123090718e-07,
|
| 46132 |
+
"loss": 0.0538,
|
| 46133 |
+
"step": 6068
|
| 46134 |
+
},
|
| 46135 |
+
{
|
| 46136 |
+
"epoch": 0.8954629288085577,
|
| 46137 |
+
"grad_norm": 5.8762617111206055,
|
| 46138 |
+
"learning_rate": 6.576582176090518e-07,
|
| 46139 |
+
"loss": 0.0398,
|
| 46140 |
+
"step": 6069
|
| 46141 |
+
},
|
| 46142 |
+
{
|
| 46143 |
+
"epoch": 0.8956104758391737,
|
| 46144 |
+
"grad_norm": 2.373622179031372,
|
| 46145 |
+
"learning_rate": 6.558223016903842e-07,
|
| 46146 |
+
"loss": 0.0969,
|
| 46147 |
+
"step": 6070
|
| 46148 |
+
},
|
| 46149 |
+
{
|
| 46150 |
+
"epoch": 0.8957580228697898,
|
| 46151 |
+
"grad_norm": 1.838910698890686,
|
| 46152 |
+
"learning_rate": 6.539888650401916e-07,
|
| 46153 |
+
"loss": 0.0363,
|
| 46154 |
+
"step": 6071
|
| 46155 |
+
},
|
| 46156 |
+
{
|
| 46157 |
+
"epoch": 0.8959055699004057,
|
| 46158 |
+
"grad_norm": 10.581929206848145,
|
| 46159 |
+
"learning_rate": 6.521579081449325e-07,
|
| 46160 |
+
"loss": 0.0918,
|
| 46161 |
+
"step": 6072
|
| 46162 |
+
},
|
| 46163 |
+
{
|
| 46164 |
+
"epoch": 0.8960531169310217,
|
| 46165 |
+
"grad_norm": 3.3708910942077637,
|
| 46166 |
+
"learning_rate": 6.503294314904108e-07,
|
| 46167 |
+
"loss": 0.059,
|
| 46168 |
+
"step": 6073
|
| 46169 |
+
},
|
| 46170 |
+
{
|
| 46171 |
+
"epoch": 0.8962006639616378,
|
| 46172 |
+
"grad_norm": 2.5104010105133057,
|
| 46173 |
+
"learning_rate": 6.485034355617748e-07,
|
| 46174 |
+
"loss": 0.0224,
|
| 46175 |
+
"step": 6074
|
| 46176 |
+
},
|
| 46177 |
+
{
|
| 46178 |
+
"epoch": 0.8963482109922538,
|
| 46179 |
+
"grad_norm": 1.8458657264709473,
|
| 46180 |
+
"learning_rate": 6.466799208435081e-07,
|
| 46181 |
+
"loss": 0.0375,
|
| 46182 |
+
"step": 6075
|
| 46183 |
+
},
|
| 46184 |
+
{
|
| 46185 |
+
"epoch": 0.8964957580228697,
|
| 46186 |
+
"grad_norm": 4.009340763092041,
|
| 46187 |
+
"learning_rate": 6.448588878194406e-07,
|
| 46188 |
+
"loss": 0.0681,
|
| 46189 |
+
"step": 6076
|
| 46190 |
+
},
|
| 46191 |
+
{
|
| 46192 |
+
"epoch": 0.8966433050534858,
|
| 46193 |
+
"grad_norm": 1.2859127521514893,
|
| 46194 |
+
"learning_rate": 6.430403369727445e-07,
|
| 46195 |
+
"loss": 0.0283,
|
| 46196 |
+
"step": 6077
|
| 46197 |
+
},
|
| 46198 |
+
{
|
| 46199 |
+
"epoch": 0.8967908520841018,
|
| 46200 |
+
"grad_norm": 1.2199759483337402,
|
| 46201 |
+
"learning_rate": 6.412242687859294e-07,
|
| 46202 |
+
"loss": 0.0296,
|
| 46203 |
+
"step": 6078
|
| 46204 |
+
},
|
| 46205 |
+
{
|
| 46206 |
+
"epoch": 0.8969383991147178,
|
| 46207 |
+
"grad_norm": 2.5927979946136475,
|
| 46208 |
+
"learning_rate": 6.394106837408487e-07,
|
| 46209 |
+
"loss": 0.0468,
|
| 46210 |
+
"step": 6079
|
| 46211 |
+
},
|
| 46212 |
+
{
|
| 46213 |
+
"epoch": 0.8970859461453339,
|
| 46214 |
+
"grad_norm": 7.891746997833252,
|
| 46215 |
+
"learning_rate": 6.375995823186987e-07,
|
| 46216 |
+
"loss": 0.0912,
|
| 46217 |
+
"step": 6080
|
| 46218 |
+
},
|
| 46219 |
+
{
|
| 46220 |
+
"epoch": 0.8970859461453339,
|
| 46221 |
+
"eval_accuracy": 0.9782923299565847,
|
| 46222 |
+
"eval_f1": 0.9629629629629629,
|
| 46223 |
+
"eval_loss": 0.05581069737672806,
|
| 46224 |
+
"eval_precision": 0.9798994974874372,
|
| 46225 |
+
"eval_recall": 0.9466019417475728,
|
| 46226 |
+
"eval_runtime": 48.7869,
|
| 46227 |
+
"eval_samples_per_second": 5.965,
|
| 46228 |
+
"eval_steps_per_second": 0.205,
|
| 46229 |
+
"step": 6080
|
| 46230 |
+
},
|
| 46231 |
+
{
|
| 46232 |
+
"epoch": 0.8972334931759498,
|
| 46233 |
+
"grad_norm": 2.4385836124420166,
|
| 46234 |
+
"learning_rate": 6.357909650000094e-07,
|
| 46235 |
+
"loss": 0.0744,
|
| 46236 |
+
"step": 6081
|
| 46237 |
+
},
|
| 46238 |
+
{
|
| 46239 |
+
"epoch": 0.8973810402065658,
|
| 46240 |
+
"grad_norm": 1.913123607635498,
|
| 46241 |
+
"learning_rate": 6.339848322646625e-07,
|
| 46242 |
+
"loss": 0.0558,
|
| 46243 |
+
"step": 6082
|
| 46244 |
+
},
|
| 46245 |
+
{
|
| 46246 |
+
"epoch": 0.8975285872371819,
|
| 46247 |
+
"grad_norm": 2.230882167816162,
|
| 46248 |
+
"learning_rate": 6.321811845918735e-07,
|
| 46249 |
+
"loss": 0.0325,
|
| 46250 |
+
"step": 6083
|
| 46251 |
+
},
|
| 46252 |
+
{
|
| 46253 |
+
"epoch": 0.8976761342677979,
|
| 46254 |
+
"grad_norm": 1.8755987882614136,
|
| 46255 |
+
"learning_rate": 6.303800224601986e-07,
|
| 46256 |
+
"loss": 0.0615,
|
| 46257 |
+
"step": 6084
|
| 46258 |
+
},
|
| 46259 |
+
{
|
| 46260 |
+
"epoch": 0.8978236812984138,
|
| 46261 |
+
"grad_norm": 3.3818376064300537,
|
| 46262 |
+
"learning_rate": 6.285813463475366e-07,
|
| 46263 |
+
"loss": 0.0615,
|
| 46264 |
+
"step": 6085
|
| 46265 |
+
},
|
| 46266 |
+
{
|
| 46267 |
+
"epoch": 0.8979712283290299,
|
| 46268 |
+
"grad_norm": 1.8080580234527588,
|
| 46269 |
+
"learning_rate": 6.267851567311256e-07,
|
| 46270 |
+
"loss": 0.0506,
|
| 46271 |
+
"step": 6086
|
| 46272 |
+
},
|
| 46273 |
+
{
|
| 46274 |
+
"epoch": 0.8981187753596459,
|
| 46275 |
+
"grad_norm": 3.3100953102111816,
|
| 46276 |
+
"learning_rate": 6.249914540875445e-07,
|
| 46277 |
+
"loss": 0.0438,
|
| 46278 |
+
"step": 6087
|
| 46279 |
+
},
|
| 46280 |
+
{
|
| 46281 |
+
"epoch": 0.8982663223902619,
|
| 46282 |
+
"grad_norm": 2.4367289543151855,
|
| 46283 |
+
"learning_rate": 6.232002388927127e-07,
|
| 46284 |
+
"loss": 0.0651,
|
| 46285 |
+
"step": 6088
|
| 46286 |
+
},
|
| 46287 |
+
{
|
| 46288 |
+
"epoch": 0.8984138694208779,
|
| 46289 |
+
"grad_norm": 3.0320327281951904,
|
| 46290 |
+
"learning_rate": 6.214115116218877e-07,
|
| 46291 |
+
"loss": 0.0466,
|
| 46292 |
+
"step": 6089
|
| 46293 |
+
},
|
| 46294 |
+
{
|
| 46295 |
+
"epoch": 0.8985614164514939,
|
| 46296 |
+
"grad_norm": 1.3278833627700806,
|
| 46297 |
+
"learning_rate": 6.19625272749671e-07,
|
| 46298 |
+
"loss": 0.0228,
|
| 46299 |
+
"step": 6090
|
| 46300 |
+
},
|
| 46301 |
+
{
|
| 46302 |
+
"epoch": 0.8987089634821099,
|
| 46303 |
+
"grad_norm": 2.3165955543518066,
|
| 46304 |
+
"learning_rate": 6.178415227499979e-07,
|
| 46305 |
+
"loss": 0.0507,
|
| 46306 |
+
"step": 6091
|
| 46307 |
+
},
|
| 46308 |
+
{
|
| 46309 |
+
"epoch": 0.898856510512726,
|
| 46310 |
+
"grad_norm": 1.7083970308303833,
|
| 46311 |
+
"learning_rate": 6.160602620961487e-07,
|
| 46312 |
+
"loss": 0.043,
|
| 46313 |
+
"step": 6092
|
| 46314 |
+
},
|
| 46315 |
+
{
|
| 46316 |
+
"epoch": 0.8990040575433419,
|
| 46317 |
+
"grad_norm": 2.6195430755615234,
|
| 46318 |
+
"learning_rate": 6.142814912607409e-07,
|
| 46319 |
+
"loss": 0.0706,
|
| 46320 |
+
"step": 6093
|
| 46321 |
+
},
|
| 46322 |
+
{
|
| 46323 |
+
"epoch": 0.8991516045739579,
|
| 46324 |
+
"grad_norm": 2.1232919692993164,
|
| 46325 |
+
"learning_rate": 6.125052107157304e-07,
|
| 46326 |
+
"loss": 0.0561,
|
| 46327 |
+
"step": 6094
|
| 46328 |
+
},
|
| 46329 |
+
{
|
| 46330 |
+
"epoch": 0.899299151604574,
|
| 46331 |
+
"grad_norm": 3.1567232608795166,
|
| 46332 |
+
"learning_rate": 6.107314209324123e-07,
|
| 46333 |
+
"loss": 0.0495,
|
| 46334 |
+
"step": 6095
|
| 46335 |
+
},
|
| 46336 |
+
{
|
| 46337 |
+
"epoch": 0.89944669863519,
|
| 46338 |
+
"grad_norm": 1.2741518020629883,
|
| 46339 |
+
"learning_rate": 6.089601223814268e-07,
|
| 46340 |
+
"loss": 0.0388,
|
| 46341 |
+
"step": 6096
|
| 46342 |
+
},
|
| 46343 |
+
{
|
| 46344 |
+
"epoch": 0.8995942456658059,
|
| 46345 |
+
"grad_norm": 2.4637041091918945,
|
| 46346 |
+
"learning_rate": 6.071913155327447e-07,
|
| 46347 |
+
"loss": 0.0538,
|
| 46348 |
+
"step": 6097
|
| 46349 |
+
},
|
| 46350 |
+
{
|
| 46351 |
+
"epoch": 0.899741792696422,
|
| 46352 |
+
"grad_norm": 2.406578302383423,
|
| 46353 |
+
"learning_rate": 6.054250008556783e-07,
|
| 46354 |
+
"loss": 0.0796,
|
| 46355 |
+
"step": 6098
|
| 46356 |
+
},
|
| 46357 |
+
{
|
| 46358 |
+
"epoch": 0.899889339727038,
|
| 46359 |
+
"grad_norm": 3.432889223098755,
|
| 46360 |
+
"learning_rate": 6.036611788188829e-07,
|
| 46361 |
+
"loss": 0.0655,
|
| 46362 |
+
"step": 6099
|
| 46363 |
+
},
|
| 46364 |
+
{
|
| 46365 |
+
"epoch": 0.900036886757654,
|
| 46366 |
+
"grad_norm": 3.666919708251953,
|
| 46367 |
+
"learning_rate": 6.018998498903472e-07,
|
| 46368 |
+
"loss": 0.0307,
|
| 46369 |
+
"step": 6100
|
| 46370 |
+
},
|
| 46371 |
+
{
|
| 46372 |
+
"epoch": 0.900036886757654,
|
| 46373 |
+
"eval_accuracy": 0.9782923299565847,
|
| 46374 |
+
"eval_f1": 0.9629629629629629,
|
| 46375 |
+
"eval_loss": 0.05625491961836815,
|
| 46376 |
+
"eval_precision": 0.9798994974874372,
|
| 46377 |
+
"eval_recall": 0.9466019417475728,
|
| 46378 |
+
"eval_runtime": 49.337,
|
| 46379 |
+
"eval_samples_per_second": 5.898,
|
| 46380 |
+
"eval_steps_per_second": 0.203,
|
| 46381 |
+
"step": 6100
|
| 46382 |
}
|
| 46383 |
],
|
| 46384 |
"logging_steps": 1,
|
|
|
|
| 46398 |
"attributes": {}
|
| 46399 |
}
|
| 46400 |
},
|
| 46401 |
+
"total_flos": 1.8789295751031685e+18,
|
| 46402 |
"train_batch_size": 8,
|
| 46403 |
"trial_name": null,
|
| 46404 |
"trial_params": null
|