Training in progress, step 2100, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:050e4db55e69664bf6d9c834522ec2206b36b64c8d2f6ed4d5d17b4cf9da2f4e
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:adb3a674abc7da7a23279462f8cae294d8ecdec98362fed586fc3bccef1a61d4
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1e641d2b2e349a4c213409e52cf62d25bc236ac15c9791b7bc804909f7f92c3
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:866ec72c28b8ea1e8a4c76f5ed42b739d69875ec24137c268880795bd767ba9b
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f5d80504b530d1236d869d6a0431889ce3c16ca369fab9ca79aef572e1e676f
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b0148021e09d7b7a64e41765bf2c33e45d25853d9e709eca7c135e74bee54b7
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cda280986df81c923c89a0a8a61df0a1484f3b11f668604be6beb240af22c140
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bb78205b0b38be64245705e3d63c368f26e81d439c05fe7f4f6ee459319648f
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68bc9217b6e9ab217f22aba698dbeddd344df01c6c8d3bf496373786b4d6b46f
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72a20a9702c2689058ab5d5d2baeb8c7227e34d68571334f92805043bd9e18eb
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8533b05acf81e2a8c388c137bc99083b4a5fc4f3554fc80f7b0497d2e0eca05f
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4e0b82e92d540a47961438b15ece197574d010671ffe40e6c7ee07f5dac4307
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ac1b330f53ae14ab4a2bb829af8af4d5e4c909474cfca651cf822672c87529f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -15219,6 +15219,766 @@
|
|
| 15219 |
"eval_samples_per_second": 5.375,
|
| 15220 |
"eval_steps_per_second": 0.179,
|
| 15221 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15222 |
}
|
| 15223 |
],
|
| 15224 |
"logging_steps": 1,
|
|
@@ -15238,7 +15998,7 @@
|
|
| 15238 |
"attributes": {}
|
| 15239 |
}
|
| 15240 |
},
|
| 15241 |
-
"total_flos": 5.
|
| 15242 |
"train_batch_size": 8,
|
| 15243 |
"trial_name": null,
|
| 15244 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9954965631666272,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 2100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 15219 |
"eval_samples_per_second": 5.375,
|
| 15220 |
"eval_steps_per_second": 0.179,
|
| 15221 |
"step": 2000
|
| 15222 |
+
},
|
| 15223 |
+
{
|
| 15224 |
+
"epoch": 0.9485660109030576,
|
| 15225 |
+
"grad_norm": 2.924222707748413,
|
| 15226 |
+
"learning_rate": 1.5935584725626062e-07,
|
| 15227 |
+
"loss": 0.0924,
|
| 15228 |
+
"step": 2001
|
| 15229 |
+
},
|
| 15230 |
+
{
|
| 15231 |
+
"epoch": 0.9490400568855178,
|
| 15232 |
+
"grad_norm": 5.106085300445557,
|
| 15233 |
+
"learning_rate": 1.5642615756586765e-07,
|
| 15234 |
+
"loss": 0.1919,
|
| 15235 |
+
"step": 2002
|
| 15236 |
+
},
|
| 15237 |
+
{
|
| 15238 |
+
"epoch": 0.9495141028679782,
|
| 15239 |
+
"grad_norm": 5.821203708648682,
|
| 15240 |
+
"learning_rate": 1.5352343657680234e-07,
|
| 15241 |
+
"loss": 0.1551,
|
| 15242 |
+
"step": 2003
|
| 15243 |
+
},
|
| 15244 |
+
{
|
| 15245 |
+
"epoch": 0.9499881488504385,
|
| 15246 |
+
"grad_norm": 4.752243518829346,
|
| 15247 |
+
"learning_rate": 1.506476922417266e-07,
|
| 15248 |
+
"loss": 0.16,
|
| 15249 |
+
"step": 2004
|
| 15250 |
+
},
|
| 15251 |
+
{
|
| 15252 |
+
"epoch": 0.9504621948328988,
|
| 15253 |
+
"grad_norm": 4.044118404388428,
|
| 15254 |
+
"learning_rate": 1.4779893243939358e-07,
|
| 15255 |
+
"loss": 0.1228,
|
| 15256 |
+
"step": 2005
|
| 15257 |
+
},
|
| 15258 |
+
{
|
| 15259 |
+
"epoch": 0.9509362408153591,
|
| 15260 |
+
"grad_norm": 5.809322834014893,
|
| 15261 |
+
"learning_rate": 1.4497716497462676e-07,
|
| 15262 |
+
"loss": 0.1309,
|
| 15263 |
+
"step": 2006
|
| 15264 |
+
},
|
| 15265 |
+
{
|
| 15266 |
+
"epoch": 0.9514102867978194,
|
| 15267 |
+
"grad_norm": 5.9313459396362305,
|
| 15268 |
+
"learning_rate": 1.4218239757829656e-07,
|
| 15269 |
+
"loss": 0.1126,
|
| 15270 |
+
"step": 2007
|
| 15271 |
+
},
|
| 15272 |
+
{
|
| 15273 |
+
"epoch": 0.9518843327802797,
|
| 15274 |
+
"grad_norm": 5.524699687957764,
|
| 15275 |
+
"learning_rate": 1.3941463790730248e-07,
|
| 15276 |
+
"loss": 0.0932,
|
| 15277 |
+
"step": 2008
|
| 15278 |
+
},
|
| 15279 |
+
{
|
| 15280 |
+
"epoch": 0.95235837876274,
|
| 15281 |
+
"grad_norm": 3.8316197395324707,
|
| 15282 |
+
"learning_rate": 1.3667389354454997e-07,
|
| 15283 |
+
"loss": 0.1288,
|
| 15284 |
+
"step": 2009
|
| 15285 |
+
},
|
| 15286 |
+
{
|
| 15287 |
+
"epoch": 0.9528324247452002,
|
| 15288 |
+
"grad_norm": 4.261562347412109,
|
| 15289 |
+
"learning_rate": 1.3396017199892808e-07,
|
| 15290 |
+
"loss": 0.0725,
|
| 15291 |
+
"step": 2010
|
| 15292 |
+
},
|
| 15293 |
+
{
|
| 15294 |
+
"epoch": 0.9533064707276606,
|
| 15295 |
+
"grad_norm": 3.045381546020508,
|
| 15296 |
+
"learning_rate": 1.312734807052929e-07,
|
| 15297 |
+
"loss": 0.1336,
|
| 15298 |
+
"step": 2011
|
| 15299 |
+
},
|
| 15300 |
+
{
|
| 15301 |
+
"epoch": 0.9537805167101209,
|
| 15302 |
+
"grad_norm": 4.582825183868408,
|
| 15303 |
+
"learning_rate": 1.2861382702444304e-07,
|
| 15304 |
+
"loss": 0.1122,
|
| 15305 |
+
"step": 2012
|
| 15306 |
+
},
|
| 15307 |
+
{
|
| 15308 |
+
"epoch": 0.9542545626925811,
|
| 15309 |
+
"grad_norm": 5.358804702758789,
|
| 15310 |
+
"learning_rate": 1.2598121824310305e-07,
|
| 15311 |
+
"loss": 0.2103,
|
| 15312 |
+
"step": 2013
|
| 15313 |
+
},
|
| 15314 |
+
{
|
| 15315 |
+
"epoch": 0.9547286086750415,
|
| 15316 |
+
"grad_norm": 8.944177627563477,
|
| 15317 |
+
"learning_rate": 1.2337566157390124e-07,
|
| 15318 |
+
"loss": 0.2294,
|
| 15319 |
+
"step": 2014
|
| 15320 |
+
},
|
| 15321 |
+
{
|
| 15322 |
+
"epoch": 0.9552026546575018,
|
| 15323 |
+
"grad_norm": 3.860495090484619,
|
| 15324 |
+
"learning_rate": 1.2079716415534958e-07,
|
| 15325 |
+
"loss": 0.1725,
|
| 15326 |
+
"step": 2015
|
| 15327 |
+
},
|
| 15328 |
+
{
|
| 15329 |
+
"epoch": 0.9556767006399621,
|
| 15330 |
+
"grad_norm": 5.890530586242676,
|
| 15331 |
+
"learning_rate": 1.1824573305182829e-07,
|
| 15332 |
+
"loss": 0.1347,
|
| 15333 |
+
"step": 2016
|
| 15334 |
+
},
|
| 15335 |
+
{
|
| 15336 |
+
"epoch": 0.9561507466224224,
|
| 15337 |
+
"grad_norm": 4.890679359436035,
|
| 15338 |
+
"learning_rate": 1.1572137525356019e-07,
|
| 15339 |
+
"loss": 0.1632,
|
| 15340 |
+
"step": 2017
|
| 15341 |
+
},
|
| 15342 |
+
{
|
| 15343 |
+
"epoch": 0.9566247926048826,
|
| 15344 |
+
"grad_norm": 3.409152030944824,
|
| 15345 |
+
"learning_rate": 1.1322409767659526e-07,
|
| 15346 |
+
"loss": 0.1673,
|
| 15347 |
+
"step": 2018
|
| 15348 |
+
},
|
| 15349 |
+
{
|
| 15350 |
+
"epoch": 0.957098838587343,
|
| 15351 |
+
"grad_norm": 2.9978771209716797,
|
| 15352 |
+
"learning_rate": 1.1075390716279167e-07,
|
| 15353 |
+
"loss": 0.0933,
|
| 15354 |
+
"step": 2019
|
| 15355 |
+
},
|
| 15356 |
+
{
|
| 15357 |
+
"epoch": 0.9575728845698033,
|
| 15358 |
+
"grad_norm": 4.279489994049072,
|
| 15359 |
+
"learning_rate": 1.0831081047979585e-07,
|
| 15360 |
+
"loss": 0.1072,
|
| 15361 |
+
"step": 2020
|
| 15362 |
+
},
|
| 15363 |
+
{
|
| 15364 |
+
"epoch": 0.9575728845698033,
|
| 15365 |
+
"eval_accuracy": 0.9935587761674718,
|
| 15366 |
+
"eval_f1": 0.9272727272727272,
|
| 15367 |
+
"eval_loss": 0.012736320495605469,
|
| 15368 |
+
"eval_precision": 0.8793103448275862,
|
| 15369 |
+
"eval_recall": 0.9807692307692307,
|
| 15370 |
+
"eval_runtime": 50.4208,
|
| 15371 |
+
"eval_samples_per_second": 5.375,
|
| 15372 |
+
"eval_steps_per_second": 0.178,
|
| 15373 |
+
"step": 2020
|
| 15374 |
+
},
|
| 15375 |
+
{
|
| 15376 |
+
"epoch": 0.9580469305522635,
|
| 15377 |
+
"grad_norm": 3.377288579940796,
|
| 15378 |
+
"learning_rate": 1.0589481432102588e-07,
|
| 15379 |
+
"loss": 0.1007,
|
| 15380 |
+
"step": 2021
|
| 15381 |
+
},
|
| 15382 |
+
{
|
| 15383 |
+
"epoch": 0.9585209765347239,
|
| 15384 |
+
"grad_norm": 4.943248271942139,
|
| 15385 |
+
"learning_rate": 1.0350592530564919e-07,
|
| 15386 |
+
"loss": 0.1345,
|
| 15387 |
+
"step": 2022
|
| 15388 |
+
},
|
| 15389 |
+
{
|
| 15390 |
+
"epoch": 0.9589950225171842,
|
| 15391 |
+
"grad_norm": 3.178915500640869,
|
| 15392 |
+
"learning_rate": 1.0114414997856814e-07,
|
| 15393 |
+
"loss": 0.1501,
|
| 15394 |
+
"step": 2023
|
| 15395 |
+
},
|
| 15396 |
+
{
|
| 15397 |
+
"epoch": 0.9594690684996444,
|
| 15398 |
+
"grad_norm": 2.851790428161621,
|
| 15399 |
+
"learning_rate": 9.880949481040347e-08,
|
| 15400 |
+
"loss": 0.1128,
|
| 15401 |
+
"step": 2024
|
| 15402 |
+
},
|
| 15403 |
+
{
|
| 15404 |
+
"epoch": 0.9599431144821048,
|
| 15405 |
+
"grad_norm": 7.474143981933594,
|
| 15406 |
+
"learning_rate": 9.650196619747088e-08,
|
| 15407 |
+
"loss": 0.2338,
|
| 15408 |
+
"step": 2025
|
| 15409 |
+
},
|
| 15410 |
+
{
|
| 15411 |
+
"epoch": 0.960417160464565,
|
| 15412 |
+
"grad_norm": 4.426879405975342,
|
| 15413 |
+
"learning_rate": 9.422157046176772e-08,
|
| 15414 |
+
"loss": 0.1695,
|
| 15415 |
+
"step": 2026
|
| 15416 |
+
},
|
| 15417 |
+
{
|
| 15418 |
+
"epoch": 0.9608912064470254,
|
| 15419 |
+
"grad_norm": 4.276393890380859,
|
| 15420 |
+
"learning_rate": 9.19683138509564e-08,
|
| 15421 |
+
"loss": 0.1695,
|
| 15422 |
+
"step": 2027
|
| 15423 |
+
},
|
| 15424 |
+
{
|
| 15425 |
+
"epoch": 0.9613652524294857,
|
| 15426 |
+
"grad_norm": 4.4484357833862305,
|
| 15427 |
+
"learning_rate": 8.974220253834209e-08,
|
| 15428 |
+
"loss": 0.1489,
|
| 15429 |
+
"step": 2028
|
| 15430 |
+
},
|
| 15431 |
+
{
|
| 15432 |
+
"epoch": 0.9618392984119459,
|
| 15433 |
+
"grad_norm": 6.785750389099121,
|
| 15434 |
+
"learning_rate": 8.754324262286284e-08,
|
| 15435 |
+
"loss": 0.1951,
|
| 15436 |
+
"step": 2029
|
| 15437 |
+
},
|
| 15438 |
+
{
|
| 15439 |
+
"epoch": 0.9623133443944063,
|
| 15440 |
+
"grad_norm": 5.472995281219482,
|
| 15441 |
+
"learning_rate": 8.537144012906728e-08,
|
| 15442 |
+
"loss": 0.1067,
|
| 15443 |
+
"step": 2030
|
| 15444 |
+
},
|
| 15445 |
+
{
|
| 15446 |
+
"epoch": 0.9627873903768666,
|
| 15447 |
+
"grad_norm": 7.181637287139893,
|
| 15448 |
+
"learning_rate": 8.322680100710023e-08,
|
| 15449 |
+
"loss": 0.161,
|
| 15450 |
+
"step": 2031
|
| 15451 |
+
},
|
| 15452 |
+
{
|
| 15453 |
+
"epoch": 0.9632614363593268,
|
| 15454 |
+
"grad_norm": 2.844578504562378,
|
| 15455 |
+
"learning_rate": 8.110933113268604e-08,
|
| 15456 |
+
"loss": 0.1443,
|
| 15457 |
+
"step": 2032
|
| 15458 |
+
},
|
| 15459 |
+
{
|
| 15460 |
+
"epoch": 0.9637354823417872,
|
| 15461 |
+
"grad_norm": 4.869422435760498,
|
| 15462 |
+
"learning_rate": 7.901903630711416e-08,
|
| 15463 |
+
"loss": 0.0947,
|
| 15464 |
+
"step": 2033
|
| 15465 |
+
},
|
| 15466 |
+
{
|
| 15467 |
+
"epoch": 0.9642095283242474,
|
| 15468 |
+
"grad_norm": 8.479473114013672,
|
| 15469 |
+
"learning_rate": 7.695592225722137e-08,
|
| 15470 |
+
"loss": 0.1779,
|
| 15471 |
+
"step": 2034
|
| 15472 |
+
},
|
| 15473 |
+
{
|
| 15474 |
+
"epoch": 0.9646835743067077,
|
| 15475 |
+
"grad_norm": 6.677021503448486,
|
| 15476 |
+
"learning_rate": 7.491999463537403e-08,
|
| 15477 |
+
"loss": 0.2122,
|
| 15478 |
+
"step": 2035
|
| 15479 |
+
},
|
| 15480 |
+
{
|
| 15481 |
+
"epoch": 0.9651576202891681,
|
| 15482 |
+
"grad_norm": 6.709725379943848,
|
| 15483 |
+
"learning_rate": 7.291125901946027e-08,
|
| 15484 |
+
"loss": 0.2262,
|
| 15485 |
+
"step": 2036
|
| 15486 |
+
},
|
| 15487 |
+
{
|
| 15488 |
+
"epoch": 0.9656316662716283,
|
| 15489 |
+
"grad_norm": 9.639774322509766,
|
| 15490 |
+
"learning_rate": 7.092972091286454e-08,
|
| 15491 |
+
"loss": 0.1651,
|
| 15492 |
+
"step": 2037
|
| 15493 |
+
},
|
| 15494 |
+
{
|
| 15495 |
+
"epoch": 0.9661057122540887,
|
| 15496 |
+
"grad_norm": 4.166116714477539,
|
| 15497 |
+
"learning_rate": 6.897538574445972e-08,
|
| 15498 |
+
"loss": 0.0947,
|
| 15499 |
+
"step": 2038
|
| 15500 |
+
},
|
| 15501 |
+
{
|
| 15502 |
+
"epoch": 0.966579758236549,
|
| 15503 |
+
"grad_norm": 5.3391876220703125,
|
| 15504 |
+
"learning_rate": 6.704825886858946e-08,
|
| 15505 |
+
"loss": 0.1904,
|
| 15506 |
+
"step": 2039
|
| 15507 |
+
},
|
| 15508 |
+
{
|
| 15509 |
+
"epoch": 0.9670538042190092,
|
| 15510 |
+
"grad_norm": 4.7893595695495605,
|
| 15511 |
+
"learning_rate": 6.5148345565057e-08,
|
| 15512 |
+
"loss": 0.2165,
|
| 15513 |
+
"step": 2040
|
| 15514 |
+
},
|
| 15515 |
+
{
|
| 15516 |
+
"epoch": 0.9670538042190092,
|
| 15517 |
+
"eval_accuracy": 0.9935587761674718,
|
| 15518 |
+
"eval_f1": 0.9272727272727272,
|
| 15519 |
+
"eval_loss": 0.012882479466497898,
|
| 15520 |
+
"eval_precision": 0.8793103448275862,
|
| 15521 |
+
"eval_recall": 0.9807692307692307,
|
| 15522 |
+
"eval_runtime": 50.0769,
|
| 15523 |
+
"eval_samples_per_second": 5.412,
|
| 15524 |
+
"eval_steps_per_second": 0.18,
|
| 15525 |
+
"step": 2040
|
| 15526 |
+
},
|
| 15527 |
+
{
|
| 15528 |
+
"epoch": 0.9675278502014696,
|
| 15529 |
+
"grad_norm": 4.419187545776367,
|
| 15530 |
+
"learning_rate": 6.327565103910193e-08,
|
| 15531 |
+
"loss": 0.168,
|
| 15532 |
+
"step": 2041
|
| 15533 |
+
},
|
| 15534 |
+
{
|
| 15535 |
+
"epoch": 0.9680018961839298,
|
| 15536 |
+
"grad_norm": 10.345693588256836,
|
| 15537 |
+
"learning_rate": 6.143018042139903e-08,
|
| 15538 |
+
"loss": 0.1948,
|
| 15539 |
+
"step": 2042
|
| 15540 |
+
},
|
| 15541 |
+
{
|
| 15542 |
+
"epoch": 0.9684759421663901,
|
| 15543 |
+
"grad_norm": 8.49881649017334,
|
| 15544 |
+
"learning_rate": 5.96119387680294e-08,
|
| 15545 |
+
"loss": 0.1262,
|
| 15546 |
+
"step": 2043
|
| 15547 |
+
},
|
| 15548 |
+
{
|
| 15549 |
+
"epoch": 0.9689499881488505,
|
| 15550 |
+
"grad_norm": 3.0430965423583984,
|
| 15551 |
+
"learning_rate": 5.782093106048159e-08,
|
| 15552 |
+
"loss": 0.1379,
|
| 15553 |
+
"step": 2044
|
| 15554 |
+
},
|
| 15555 |
+
{
|
| 15556 |
+
"epoch": 0.9694240341313107,
|
| 15557 |
+
"grad_norm": 3.5971128940582275,
|
| 15558 |
+
"learning_rate": 5.605716220562385e-08,
|
| 15559 |
+
"loss": 0.1537,
|
| 15560 |
+
"step": 2045
|
| 15561 |
+
},
|
| 15562 |
+
{
|
| 15563 |
+
"epoch": 0.969898080113771,
|
| 15564 |
+
"grad_norm": 2.2294721603393555,
|
| 15565 |
+
"learning_rate": 5.4320637035704114e-08,
|
| 15566 |
+
"loss": 0.0722,
|
| 15567 |
+
"step": 2046
|
| 15568 |
+
},
|
| 15569 |
+
{
|
| 15570 |
+
"epoch": 0.9703721260962314,
|
| 15571 |
+
"grad_norm": 6.108776569366455,
|
| 15572 |
+
"learning_rate": 5.2611360308323364e-08,
|
| 15573 |
+
"loss": 0.1432,
|
| 15574 |
+
"step": 2047
|
| 15575 |
+
},
|
| 15576 |
+
{
|
| 15577 |
+
"epoch": 0.9708461720786916,
|
| 15578 |
+
"grad_norm": 4.834316730499268,
|
| 15579 |
+
"learning_rate": 5.092933670643452e-08,
|
| 15580 |
+
"loss": 0.1153,
|
| 15581 |
+
"step": 2048
|
| 15582 |
+
},
|
| 15583 |
+
{
|
| 15584 |
+
"epoch": 0.971320218061152,
|
| 15585 |
+
"grad_norm": 3.5349068641662598,
|
| 15586 |
+
"learning_rate": 4.9274570838322436e-08,
|
| 15587 |
+
"loss": 0.09,
|
| 15588 |
+
"step": 2049
|
| 15589 |
+
},
|
| 15590 |
+
{
|
| 15591 |
+
"epoch": 0.9717942640436122,
|
| 15592 |
+
"grad_norm": 5.70138692855835,
|
| 15593 |
+
"learning_rate": 4.764706723759172e-08,
|
| 15594 |
+
"loss": 0.1562,
|
| 15595 |
+
"step": 2050
|
| 15596 |
+
},
|
| 15597 |
+
{
|
| 15598 |
+
"epoch": 0.9722683100260725,
|
| 15599 |
+
"grad_norm": 7.175850868225098,
|
| 15600 |
+
"learning_rate": 4.604683036316004e-08,
|
| 15601 |
+
"loss": 0.3258,
|
| 15602 |
+
"step": 2051
|
| 15603 |
+
},
|
| 15604 |
+
{
|
| 15605 |
+
"epoch": 0.9727423560085329,
|
| 15606 |
+
"grad_norm": 4.018371105194092,
|
| 15607 |
+
"learning_rate": 4.4473864599235975e-08,
|
| 15608 |
+
"loss": 0.2188,
|
| 15609 |
+
"step": 2052
|
| 15610 |
+
},
|
| 15611 |
+
{
|
| 15612 |
+
"epoch": 0.9732164019909931,
|
| 15613 |
+
"grad_norm": 5.978484153747559,
|
| 15614 |
+
"learning_rate": 4.29281742553167e-08,
|
| 15615 |
+
"loss": 0.1379,
|
| 15616 |
+
"step": 2053
|
| 15617 |
+
},
|
| 15618 |
+
{
|
| 15619 |
+
"epoch": 0.9736904479734534,
|
| 15620 |
+
"grad_norm": 3.8246636390686035,
|
| 15621 |
+
"learning_rate": 4.1409763566172544e-08,
|
| 15622 |
+
"loss": 0.1241,
|
| 15623 |
+
"step": 2054
|
| 15624 |
+
},
|
| 15625 |
+
{
|
| 15626 |
+
"epoch": 0.9741644939559138,
|
| 15627 |
+
"grad_norm": 3.782214641571045,
|
| 15628 |
+
"learning_rate": 3.991863669183138e-08,
|
| 15629 |
+
"loss": 0.1725,
|
| 15630 |
+
"step": 2055
|
| 15631 |
+
},
|
| 15632 |
+
{
|
| 15633 |
+
"epoch": 0.974638539938374,
|
| 15634 |
+
"grad_norm": 5.974038124084473,
|
| 15635 |
+
"learning_rate": 3.845479771757532e-08,
|
| 15636 |
+
"loss": 0.0927,
|
| 15637 |
+
"step": 2056
|
| 15638 |
+
},
|
| 15639 |
+
{
|
| 15640 |
+
"epoch": 0.9751125859208343,
|
| 15641 |
+
"grad_norm": 4.958864688873291,
|
| 15642 |
+
"learning_rate": 3.701825065392184e-08,
|
| 15643 |
+
"loss": 0.1666,
|
| 15644 |
+
"step": 2057
|
| 15645 |
+
},
|
| 15646 |
+
{
|
| 15647 |
+
"epoch": 0.9755866319032946,
|
| 15648 |
+
"grad_norm": 7.515510559082031,
|
| 15649 |
+
"learning_rate": 3.560899943661822e-08,
|
| 15650 |
+
"loss": 0.1924,
|
| 15651 |
+
"step": 2058
|
| 15652 |
+
},
|
| 15653 |
+
{
|
| 15654 |
+
"epoch": 0.9760606778857549,
|
| 15655 |
+
"grad_norm": 6.320629596710205,
|
| 15656 |
+
"learning_rate": 3.422704792662601e-08,
|
| 15657 |
+
"loss": 0.1618,
|
| 15658 |
+
"step": 2059
|
| 15659 |
+
},
|
| 15660 |
+
{
|
| 15661 |
+
"epoch": 0.9765347238682153,
|
| 15662 |
+
"grad_norm": 3.3863184452056885,
|
| 15663 |
+
"learning_rate": 3.2872399910115484e-08,
|
| 15664 |
+
"loss": 0.1417,
|
| 15665 |
+
"step": 2060
|
| 15666 |
+
},
|
| 15667 |
+
{
|
| 15668 |
+
"epoch": 0.9765347238682153,
|
| 15669 |
+
"eval_accuracy": 0.9935587761674718,
|
| 15670 |
+
"eval_f1": 0.9272727272727272,
|
| 15671 |
+
"eval_loss": 0.012739640660583973,
|
| 15672 |
+
"eval_precision": 0.8793103448275862,
|
| 15673 |
+
"eval_recall": 0.9807692307692307,
|
| 15674 |
+
"eval_runtime": 49.7841,
|
| 15675 |
+
"eval_samples_per_second": 5.444,
|
| 15676 |
+
"eval_steps_per_second": 0.181,
|
| 15677 |
+
"step": 2060
|
| 15678 |
+
},
|
| 15679 |
+
{
|
| 15680 |
+
"epoch": 0.9770087698506755,
|
| 15681 |
+
"grad_norm": 4.818965911865234,
|
| 15682 |
+
"learning_rate": 3.154505909845229e-08,
|
| 15683 |
+
"loss": 0.1236,
|
| 15684 |
+
"step": 2061
|
| 15685 |
+
},
|
| 15686 |
+
{
|
| 15687 |
+
"epoch": 0.9774828158331358,
|
| 15688 |
+
"grad_norm": 6.437606334686279,
|
| 15689 |
+
"learning_rate": 3.024502912818528e-08,
|
| 15690 |
+
"loss": 0.2105,
|
| 15691 |
+
"step": 2062
|
| 15692 |
+
},
|
| 15693 |
+
{
|
| 15694 |
+
"epoch": 0.9779568618155962,
|
| 15695 |
+
"grad_norm": 4.611502647399902,
|
| 15696 |
+
"learning_rate": 2.897231356104424e-08,
|
| 15697 |
+
"loss": 0.1779,
|
| 15698 |
+
"step": 2063
|
| 15699 |
+
},
|
| 15700 |
+
{
|
| 15701 |
+
"epoch": 0.9784309077980564,
|
| 15702 |
+
"grad_norm": 5.7043843269348145,
|
| 15703 |
+
"learning_rate": 2.7726915883919958e-08,
|
| 15704 |
+
"loss": 0.1738,
|
| 15705 |
+
"step": 2064
|
| 15706 |
+
},
|
| 15707 |
+
{
|
| 15708 |
+
"epoch": 0.9789049537805167,
|
| 15709 |
+
"grad_norm": 3.3945627212524414,
|
| 15710 |
+
"learning_rate": 2.6508839508861963e-08,
|
| 15711 |
+
"loss": 0.1066,
|
| 15712 |
+
"step": 2065
|
| 15713 |
+
},
|
| 15714 |
+
{
|
| 15715 |
+
"epoch": 0.979378999762977,
|
| 15716 |
+
"grad_norm": 3.7300400733947754,
|
| 15717 |
+
"learning_rate": 2.5318087773066325e-08,
|
| 15718 |
+
"loss": 0.1186,
|
| 15719 |
+
"step": 2066
|
| 15720 |
+
},
|
| 15721 |
+
{
|
| 15722 |
+
"epoch": 0.9798530457454373,
|
| 15723 |
+
"grad_norm": 5.509089469909668,
|
| 15724 |
+
"learning_rate": 2.4154663938867894e-08,
|
| 15725 |
+
"loss": 0.1847,
|
| 15726 |
+
"step": 2067
|
| 15727 |
+
},
|
| 15728 |
+
{
|
| 15729 |
+
"epoch": 0.9803270917278976,
|
| 15730 |
+
"grad_norm": 3.7570600509643555,
|
| 15731 |
+
"learning_rate": 2.3018571193729188e-08,
|
| 15732 |
+
"loss": 0.1604,
|
| 15733 |
+
"step": 2068
|
| 15734 |
+
},
|
| 15735 |
+
{
|
| 15736 |
+
"epoch": 0.9808011377103579,
|
| 15737 |
+
"grad_norm": 3.3540408611297607,
|
| 15738 |
+
"learning_rate": 2.190981265023373e-08,
|
| 15739 |
+
"loss": 0.0865,
|
| 15740 |
+
"step": 2069
|
| 15741 |
+
},
|
| 15742 |
+
{
|
| 15743 |
+
"epoch": 0.9812751836928182,
|
| 15744 |
+
"grad_norm": 3.976696252822876,
|
| 15745 |
+
"learning_rate": 2.082839134607828e-08,
|
| 15746 |
+
"loss": 0.1681,
|
| 15747 |
+
"step": 2070
|
| 15748 |
+
},
|
| 15749 |
+
{
|
| 15750 |
+
"epoch": 0.9817492296752786,
|
| 15751 |
+
"grad_norm": 5.1151838302612305,
|
| 15752 |
+
"learning_rate": 1.9774310244059512e-08,
|
| 15753 |
+
"loss": 0.1638,
|
| 15754 |
+
"step": 2071
|
| 15755 |
+
},
|
| 15756 |
+
{
|
| 15757 |
+
"epoch": 0.9822232756577388,
|
| 15758 |
+
"grad_norm": 6.2366414070129395,
|
| 15759 |
+
"learning_rate": 1.874757223207291e-08,
|
| 15760 |
+
"loss": 0.1142,
|
| 15761 |
+
"step": 2072
|
| 15762 |
+
},
|
| 15763 |
+
{
|
| 15764 |
+
"epoch": 0.9826973216401991,
|
| 15765 |
+
"grad_norm": 3.962942361831665,
|
| 15766 |
+
"learning_rate": 1.7748180123100535e-08,
|
| 15767 |
+
"loss": 0.136,
|
| 15768 |
+
"step": 2073
|
| 15769 |
+
},
|
| 15770 |
+
{
|
| 15771 |
+
"epoch": 0.9831713676226594,
|
| 15772 |
+
"grad_norm": 9.428374290466309,
|
| 15773 |
+
"learning_rate": 1.677613665520106e-08,
|
| 15774 |
+
"loss": 0.1083,
|
| 15775 |
+
"step": 2074
|
| 15776 |
+
},
|
| 15777 |
+
{
|
| 15778 |
+
"epoch": 0.9836454136051197,
|
| 15779 |
+
"grad_norm": 6.933211326599121,
|
| 15780 |
+
"learning_rate": 1.583144449150975e-08,
|
| 15781 |
+
"loss": 0.1716,
|
| 15782 |
+
"step": 2075
|
| 15783 |
+
},
|
| 15784 |
+
{
|
| 15785 |
+
"epoch": 0.98411945958758,
|
| 15786 |
+
"grad_norm": 5.4883575439453125,
|
| 15787 |
+
"learning_rate": 1.4914106220225156e-08,
|
| 15788 |
+
"loss": 0.1931,
|
| 15789 |
+
"step": 2076
|
| 15790 |
+
},
|
| 15791 |
+
{
|
| 15792 |
+
"epoch": 0.9845935055700403,
|
| 15793 |
+
"grad_norm": 3.674689531326294,
|
| 15794 |
+
"learning_rate": 1.402412435460132e-08,
|
| 15795 |
+
"loss": 0.167,
|
| 15796 |
+
"step": 2077
|
| 15797 |
+
},
|
| 15798 |
+
{
|
| 15799 |
+
"epoch": 0.9850675515525006,
|
| 15800 |
+
"grad_norm": 3.3293275833129883,
|
| 15801 |
+
"learning_rate": 1.3161501332947802e-08,
|
| 15802 |
+
"loss": 0.1224,
|
| 15803 |
+
"step": 2078
|
| 15804 |
+
},
|
| 15805 |
+
{
|
| 15806 |
+
"epoch": 0.9855415975349608,
|
| 15807 |
+
"grad_norm": 5.863772392272949,
|
| 15808 |
+
"learning_rate": 1.2326239518614114e-08,
|
| 15809 |
+
"loss": 0.1418,
|
| 15810 |
+
"step": 2079
|
| 15811 |
+
},
|
| 15812 |
+
{
|
| 15813 |
+
"epoch": 0.9860156435174212,
|
| 15814 |
+
"grad_norm": 4.600866317749023,
|
| 15815 |
+
"learning_rate": 1.1518341199989735e-08,
|
| 15816 |
+
"loss": 0.101,
|
| 15817 |
+
"step": 2080
|
| 15818 |
+
},
|
| 15819 |
+
{
|
| 15820 |
+
"epoch": 0.9860156435174212,
|
| 15821 |
+
"eval_accuracy": 0.9935587761674718,
|
| 15822 |
+
"eval_f1": 0.9272727272727272,
|
| 15823 |
+
"eval_loss": 0.012615163810551167,
|
| 15824 |
+
"eval_precision": 0.8793103448275862,
|
| 15825 |
+
"eval_recall": 0.9807692307692307,
|
| 15826 |
+
"eval_runtime": 50.0081,
|
| 15827 |
+
"eval_samples_per_second": 5.419,
|
| 15828 |
+
"eval_steps_per_second": 0.18,
|
| 15829 |
+
"step": 2080
|
| 15830 |
+
},
|
| 15831 |
+
{
|
| 15832 |
+
"epoch": 0.9864896894998815,
|
| 15833 |
+
"grad_norm": 6.645082950592041,
|
| 15834 |
+
"learning_rate": 1.0737808590495225e-08,
|
| 15835 |
+
"loss": 0.1798,
|
| 15836 |
+
"step": 2081
|
| 15837 |
+
},
|
| 15838 |
+
{
|
| 15839 |
+
"epoch": 0.9869637354823418,
|
| 15840 |
+
"grad_norm": 3.026750087738037,
|
| 15841 |
+
"learning_rate": 9.984643828576669e-09,
|
| 15842 |
+
"loss": 0.1094,
|
| 15843 |
+
"step": 2082
|
| 15844 |
+
},
|
| 15845 |
+
{
|
| 15846 |
+
"epoch": 0.9874377814648021,
|
| 15847 |
+
"grad_norm": 4.144604206085205,
|
| 15848 |
+
"learning_rate": 9.25884897770013e-09,
|
| 15849 |
+
"loss": 0.1389,
|
| 15850 |
+
"step": 2083
|
| 15851 |
+
},
|
| 15852 |
+
{
|
| 15853 |
+
"epoch": 0.9879118274472624,
|
| 15854 |
+
"grad_norm": 5.202576637268066,
|
| 15855 |
+
"learning_rate": 8.560426026343881e-09,
|
| 15856 |
+
"loss": 0.2081,
|
| 15857 |
+
"step": 2084
|
| 15858 |
+
},
|
| 15859 |
+
{
|
| 15860 |
+
"epoch": 0.9883858734297227,
|
| 15861 |
+
"grad_norm": 3.02374005317688,
|
| 15862 |
+
"learning_rate": 7.889376887997291e-09,
|
| 15863 |
+
"loss": 0.1092,
|
| 15864 |
+
"step": 2085
|
| 15865 |
+
},
|
| 15866 |
+
{
|
| 15867 |
+
"epoch": 0.988859919412183,
|
| 15868 |
+
"grad_norm": 2.9011049270629883,
|
| 15869 |
+
"learning_rate": 7.245703401149717e-09,
|
| 15870 |
+
"loss": 0.1357,
|
| 15871 |
+
"step": 2086
|
| 15872 |
+
},
|
| 15873 |
+
{
|
| 15874 |
+
"epoch": 0.9893339653946432,
|
| 15875 |
+
"grad_norm": 6.7899250984191895,
|
| 15876 |
+
"learning_rate": 6.629407329292736e-09,
|
| 15877 |
+
"loss": 0.1352,
|
| 15878 |
+
"step": 2087
|
| 15879 |
+
},
|
| 15880 |
+
{
|
| 15881 |
+
"epoch": 0.9898080113771036,
|
| 15882 |
+
"grad_norm": 3.2638795375823975,
|
| 15883 |
+
"learning_rate": 6.0404903609068146e-09,
|
| 15884 |
+
"loss": 0.1101,
|
| 15885 |
+
"step": 2088
|
| 15886 |
+
},
|
| 15887 |
+
{
|
| 15888 |
+
"epoch": 0.9902820573595639,
|
| 15889 |
+
"grad_norm": 5.045032501220703,
|
| 15890 |
+
"learning_rate": 5.47895410946575e-09,
|
| 15891 |
+
"loss": 0.1703,
|
| 15892 |
+
"step": 2089
|
| 15893 |
+
},
|
| 15894 |
+
{
|
| 15895 |
+
"epoch": 0.9907561033420241,
|
| 15896 |
+
"grad_norm": 3.866666078567505,
|
| 15897 |
+
"learning_rate": 4.9448001134233536e-09,
|
| 15898 |
+
"loss": 0.1551,
|
| 15899 |
+
"step": 2090
|
| 15900 |
+
},
|
| 15901 |
+
{
|
| 15902 |
+
"epoch": 0.9912301493244845,
|
| 15903 |
+
"grad_norm": 3.6094112396240234,
|
| 15904 |
+
"learning_rate": 4.438029836216773e-09,
|
| 15905 |
+
"loss": 0.1368,
|
| 15906 |
+
"step": 2091
|
| 15907 |
+
},
|
| 15908 |
+
{
|
| 15909 |
+
"epoch": 0.9917041953069448,
|
| 15910 |
+
"grad_norm": 4.1954779624938965,
|
| 15911 |
+
"learning_rate": 3.958644666257616e-09,
|
| 15912 |
+
"loss": 0.1503,
|
| 15913 |
+
"step": 2092
|
| 15914 |
+
},
|
| 15915 |
+
{
|
| 15916 |
+
"epoch": 0.9921782412894051,
|
| 15917 |
+
"grad_norm": 3.4606990814208984,
|
| 15918 |
+
"learning_rate": 3.5066459169297294e-09,
|
| 15919 |
+
"loss": 0.1193,
|
| 15920 |
+
"step": 2093
|
| 15921 |
+
},
|
| 15922 |
+
{
|
| 15923 |
+
"epoch": 0.9926522872718654,
|
| 15924 |
+
"grad_norm": 3.768021821975708,
|
| 15925 |
+
"learning_rate": 3.082034826586977e-09,
|
| 15926 |
+
"loss": 0.171,
|
| 15927 |
+
"step": 2094
|
| 15928 |
+
},
|
| 15929 |
+
{
|
| 15930 |
+
"epoch": 0.9931263332543256,
|
| 15931 |
+
"grad_norm": 3.0747859477996826,
|
| 15932 |
+
"learning_rate": 2.684812558547689e-09,
|
| 15933 |
+
"loss": 0.0801,
|
| 15934 |
+
"step": 2095
|
| 15935 |
+
},
|
| 15936 |
+
{
|
| 15937 |
+
"epoch": 0.993600379236786,
|
| 15938 |
+
"grad_norm": 6.338437080383301,
|
| 15939 |
+
"learning_rate": 2.3149802010913323e-09,
|
| 15940 |
+
"loss": 0.1644,
|
| 15941 |
+
"step": 2096
|
| 15942 |
+
},
|
| 15943 |
+
{
|
| 15944 |
+
"epoch": 0.9940744252192463,
|
| 15945 |
+
"grad_norm": 3.9809048175811768,
|
| 15946 |
+
"learning_rate": 1.9725387674585095e-09,
|
| 15947 |
+
"loss": 0.1284,
|
| 15948 |
+
"step": 2097
|
| 15949 |
+
},
|
| 15950 |
+
{
|
| 15951 |
+
"epoch": 0.9945484712017065,
|
| 15952 |
+
"grad_norm": 4.982059955596924,
|
| 15953 |
+
"learning_rate": 1.6574891958442973e-09,
|
| 15954 |
+
"loss": 0.1434,
|
| 15955 |
+
"step": 2098
|
| 15956 |
+
},
|
| 15957 |
+
{
|
| 15958 |
+
"epoch": 0.9950225171841669,
|
| 15959 |
+
"grad_norm": 2.924060583114624,
|
| 15960 |
+
"learning_rate": 1.3698323493993582e-09,
|
| 15961 |
+
"loss": 0.0911,
|
| 15962 |
+
"step": 2099
|
| 15963 |
+
},
|
| 15964 |
+
{
|
| 15965 |
+
"epoch": 0.9954965631666272,
|
| 15966 |
+
"grad_norm": 7.759647846221924,
|
| 15967 |
+
"learning_rate": 1.1095690162243878e-09,
|
| 15968 |
+
"loss": 0.2136,
|
| 15969 |
+
"step": 2100
|
| 15970 |
+
},
|
| 15971 |
+
{
|
| 15972 |
+
"epoch": 0.9954965631666272,
|
| 15973 |
+
"eval_accuracy": 0.9935587761674718,
|
| 15974 |
+
"eval_f1": 0.9272727272727272,
|
| 15975 |
+
"eval_loss": 0.012555374763906002,
|
| 15976 |
+
"eval_precision": 0.8793103448275862,
|
| 15977 |
+
"eval_recall": 0.9807692307692307,
|
| 15978 |
+
"eval_runtime": 50.017,
|
| 15979 |
+
"eval_samples_per_second": 5.418,
|
| 15980 |
+
"eval_steps_per_second": 0.18,
|
| 15981 |
+
"step": 2100
|
| 15982 |
}
|
| 15983 |
],
|
| 15984 |
"logging_steps": 1,
|
|
|
|
| 15998 |
"attributes": {}
|
| 15999 |
}
|
| 16000 |
},
|
| 16001 |
+
"total_flos": 5.6093907946255155e+17,
|
| 16002 |
"train_batch_size": 8,
|
| 16003 |
"trial_name": null,
|
| 16004 |
"trial_params": null
|