Training in progress, step 1000, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a352f0d84009b1817ea378a4704c01130220431cda057a719176edb53b9ce38
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:192c82f34e86d685c6f351fd58c1000ddea9a13d640195ac79c49fbf42423aa5
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b90ca6c9a0d45f633e326ad429b79dcb8a229254c394c0026c58947de8b6ccb1
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1ddcd3b678ecc28638f1325c2c32db98cad1876b80914907eec102e20d65888
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:441eb9b06b4fc0f3fa0a9291de25b8426d0d9f412df64f69773da2db1b4860b2
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01659b87d6d23358ab75fa4077af9feedf08b369b1c157aa83e98851b9c0d1ee
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71450373e32f8a9a1b7bd7c09bbf7665cd2aab9935d9141b9e0d70c0fce7c3de
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:173eff09d590e65fe2dd1179e23f7fb059beaf649179bf2d537bde02e80545b0
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6afe62f64f980792c5f93908f1252e0efd7d9d6dd9a401096016c0cf0f6e9df7
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c96a88229c7cf8988c09092a9afef0bd222230400623a17d132e957aa024720
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50d2280d0785bc9b8dd3a1397de7a4d5f6e608d8e08010244249962de0f0c423
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05e89f8a1132e0f0def133732be826c04d18fb1ddc8e499809e4f481802df182
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ead26a1aba46fa0b3384e323e0349ee0e9c3d6b20dad4ce8e9c9bf15675155cc
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6859,6 +6859,766 @@
|
|
| 6859 |
"eval_samples_per_second": 5.818,
|
| 6860 |
"eval_steps_per_second": 0.19,
|
| 6861 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6862 |
}
|
| 6863 |
],
|
| 6864 |
"logging_steps": 1,
|
|
@@ -6878,7 +7638,7 @@
|
|
| 6878 |
"attributes": {}
|
| 6879 |
}
|
| 6880 |
},
|
| 6881 |
-
"total_flos":
|
| 6882 |
"train_batch_size": 8,
|
| 6883 |
"trial_name": null,
|
| 6884 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9276437847866419,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 1000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6859 |
"eval_samples_per_second": 5.818,
|
| 6860 |
"eval_steps_per_second": 0.19,
|
| 6861 |
"step": 900
|
| 6862 |
+
},
|
| 6863 |
+
{
|
| 6864 |
+
"epoch": 0.8358070500927643,
|
| 6865 |
+
"grad_norm": 3.852740526199341,
|
| 6866 |
+
"learning_rate": 1.5986252796969482e-06,
|
| 6867 |
+
"loss": 0.1165,
|
| 6868 |
+
"step": 901
|
| 6869 |
+
},
|
| 6870 |
+
{
|
| 6871 |
+
"epoch": 0.8367346938775511,
|
| 6872 |
+
"grad_norm": 5.131833076477051,
|
| 6873 |
+
"learning_rate": 1.5811032226467304e-06,
|
| 6874 |
+
"loss": 0.198,
|
| 6875 |
+
"step": 902
|
| 6876 |
+
},
|
| 6877 |
+
{
|
| 6878 |
+
"epoch": 0.8376623376623377,
|
| 6879 |
+
"grad_norm": 4.975651741027832,
|
| 6880 |
+
"learning_rate": 1.5636694758399563e-06,
|
| 6881 |
+
"loss": 0.1891,
|
| 6882 |
+
"step": 903
|
| 6883 |
+
},
|
| 6884 |
+
{
|
| 6885 |
+
"epoch": 0.8385899814471243,
|
| 6886 |
+
"grad_norm": 3.24419903755188,
|
| 6887 |
+
"learning_rate": 1.5463242221483742e-06,
|
| 6888 |
+
"loss": 0.0935,
|
| 6889 |
+
"step": 904
|
| 6890 |
+
},
|
| 6891 |
+
{
|
| 6892 |
+
"epoch": 0.839517625231911,
|
| 6893 |
+
"grad_norm": 3.5641651153564453,
|
| 6894 |
+
"learning_rate": 1.5290676435154949e-06,
|
| 6895 |
+
"loss": 0.1533,
|
| 6896 |
+
"step": 905
|
| 6897 |
+
},
|
| 6898 |
+
{
|
| 6899 |
+
"epoch": 0.8404452690166976,
|
| 6900 |
+
"grad_norm": 3.872134208679199,
|
| 6901 |
+
"learning_rate": 1.511899920954656e-06,
|
| 6902 |
+
"loss": 0.1545,
|
| 6903 |
+
"step": 906
|
| 6904 |
+
},
|
| 6905 |
+
{
|
| 6906 |
+
"epoch": 0.8413729128014842,
|
| 6907 |
+
"grad_norm": 6.075543403625488,
|
| 6908 |
+
"learning_rate": 1.4948212345471492e-06,
|
| 6909 |
+
"loss": 0.2032,
|
| 6910 |
+
"step": 907
|
| 6911 |
+
},
|
| 6912 |
+
{
|
| 6913 |
+
"epoch": 0.8423005565862709,
|
| 6914 |
+
"grad_norm": 2.9056954383850098,
|
| 6915 |
+
"learning_rate": 1.4778317634403082e-06,
|
| 6916 |
+
"loss": 0.0986,
|
| 6917 |
+
"step": 908
|
| 6918 |
+
},
|
| 6919 |
+
{
|
| 6920 |
+
"epoch": 0.8432282003710575,
|
| 6921 |
+
"grad_norm": 5.516162872314453,
|
| 6922 |
+
"learning_rate": 1.460931685845649e-06,
|
| 6923 |
+
"loss": 0.1868,
|
| 6924 |
+
"step": 909
|
| 6925 |
+
},
|
| 6926 |
+
{
|
| 6927 |
+
"epoch": 0.8441558441558441,
|
| 6928 |
+
"grad_norm": 2.8610849380493164,
|
| 6929 |
+
"learning_rate": 1.4441211790369892e-06,
|
| 6930 |
+
"loss": 0.0923,
|
| 6931 |
+
"step": 910
|
| 6932 |
+
},
|
| 6933 |
+
{
|
| 6934 |
+
"epoch": 0.8450834879406308,
|
| 6935 |
+
"grad_norm": 3.700863838195801,
|
| 6936 |
+
"learning_rate": 1.427400419348588e-06,
|
| 6937 |
+
"loss": 0.1291,
|
| 6938 |
+
"step": 911
|
| 6939 |
+
},
|
| 6940 |
+
{
|
| 6941 |
+
"epoch": 0.8460111317254174,
|
| 6942 |
+
"grad_norm": 4.772455215454102,
|
| 6943 |
+
"learning_rate": 1.4107695821733026e-06,
|
| 6944 |
+
"loss": 0.1352,
|
| 6945 |
+
"step": 912
|
| 6946 |
+
},
|
| 6947 |
+
{
|
| 6948 |
+
"epoch": 0.8469387755102041,
|
| 6949 |
+
"grad_norm": 3.5742745399475098,
|
| 6950 |
+
"learning_rate": 1.3942288419607476e-06,
|
| 6951 |
+
"loss": 0.1824,
|
| 6952 |
+
"step": 913
|
| 6953 |
+
},
|
| 6954 |
+
{
|
| 6955 |
+
"epoch": 0.8478664192949907,
|
| 6956 |
+
"grad_norm": 8.259415626525879,
|
| 6957 |
+
"learning_rate": 1.3777783722154603e-06,
|
| 6958 |
+
"loss": 0.2448,
|
| 6959 |
+
"step": 914
|
| 6960 |
+
},
|
| 6961 |
+
{
|
| 6962 |
+
"epoch": 0.8487940630797773,
|
| 6963 |
+
"grad_norm": 3.900238513946533,
|
| 6964 |
+
"learning_rate": 1.3614183454950824e-06,
|
| 6965 |
+
"loss": 0.1273,
|
| 6966 |
+
"step": 915
|
| 6967 |
+
},
|
| 6968 |
+
{
|
| 6969 |
+
"epoch": 0.849721706864564,
|
| 6970 |
+
"grad_norm": 2.9773433208465576,
|
| 6971 |
+
"learning_rate": 1.3451489334085555e-06,
|
| 6972 |
+
"loss": 0.1522,
|
| 6973 |
+
"step": 916
|
| 6974 |
+
},
|
| 6975 |
+
{
|
| 6976 |
+
"epoch": 0.8506493506493507,
|
| 6977 |
+
"grad_norm": 3.071232318878174,
|
| 6978 |
+
"learning_rate": 1.3289703066143112e-06,
|
| 6979 |
+
"loss": 0.1256,
|
| 6980 |
+
"step": 917
|
| 6981 |
+
},
|
| 6982 |
+
{
|
| 6983 |
+
"epoch": 0.8515769944341373,
|
| 6984 |
+
"grad_norm": 3.8165667057037354,
|
| 6985 |
+
"learning_rate": 1.3128826348184886e-06,
|
| 6986 |
+
"loss": 0.1111,
|
| 6987 |
+
"step": 918
|
| 6988 |
+
},
|
| 6989 |
+
{
|
| 6990 |
+
"epoch": 0.852504638218924,
|
| 6991 |
+
"grad_norm": 3.7821688652038574,
|
| 6992 |
+
"learning_rate": 1.296886086773157e-06,
|
| 6993 |
+
"loss": 0.2091,
|
| 6994 |
+
"step": 919
|
| 6995 |
+
},
|
| 6996 |
+
{
|
| 6997 |
+
"epoch": 0.8534322820037106,
|
| 6998 |
+
"grad_norm": 4.833895206451416,
|
| 6999 |
+
"learning_rate": 1.2809808302745298e-06,
|
| 7000 |
+
"loss": 0.1762,
|
| 7001 |
+
"step": 920
|
| 7002 |
+
},
|
| 7003 |
+
{
|
| 7004 |
+
"epoch": 0.8534322820037106,
|
| 7005 |
+
"eval_accuracy": 0.8603104212860311,
|
| 7006 |
+
"eval_f1": 0.704225352112676,
|
| 7007 |
+
"eval_loss": 0.30113720893859863,
|
| 7008 |
+
"eval_precision": 0.8670520231213873,
|
| 7009 |
+
"eval_recall": 0.5928853754940712,
|
| 7010 |
+
"eval_runtime": 47.313,
|
| 7011 |
+
"eval_samples_per_second": 5.833,
|
| 7012 |
+
"eval_steps_per_second": 0.19,
|
| 7013 |
+
"step": 920
|
| 7014 |
+
},
|
| 7015 |
+
{
|
| 7016 |
+
"epoch": 0.8543599257884972,
|
| 7017 |
+
"grad_norm": 3.3207972049713135,
|
| 7018 |
+
"learning_rate": 1.2651670321612264e-06,
|
| 7019 |
+
"loss": 0.1367,
|
| 7020 |
+
"step": 921
|
| 7021 |
+
},
|
| 7022 |
+
{
|
| 7023 |
+
"epoch": 0.8552875695732839,
|
| 7024 |
+
"grad_norm": 3.202796697616577,
|
| 7025 |
+
"learning_rate": 1.249444858312502e-06,
|
| 7026 |
+
"loss": 0.1379,
|
| 7027 |
+
"step": 922
|
| 7028 |
+
},
|
| 7029 |
+
{
|
| 7030 |
+
"epoch": 0.8562152133580705,
|
| 7031 |
+
"grad_norm": 6.188356876373291,
|
| 7032 |
+
"learning_rate": 1.233814473646524e-06,
|
| 7033 |
+
"loss": 0.2627,
|
| 7034 |
+
"step": 923
|
| 7035 |
+
},
|
| 7036 |
+
{
|
| 7037 |
+
"epoch": 0.8571428571428571,
|
| 7038 |
+
"grad_norm": 3.4624321460723877,
|
| 7039 |
+
"learning_rate": 1.218276042118629e-06,
|
| 7040 |
+
"loss": 0.1318,
|
| 7041 |
+
"step": 924
|
| 7042 |
+
},
|
| 7043 |
+
{
|
| 7044 |
+
"epoch": 0.8580705009276438,
|
| 7045 |
+
"grad_norm": 3.288809061050415,
|
| 7046 |
+
"learning_rate": 1.202829726719611e-06,
|
| 7047 |
+
"loss": 0.1188,
|
| 7048 |
+
"step": 925
|
| 7049 |
+
},
|
| 7050 |
+
{
|
| 7051 |
+
"epoch": 0.8589981447124304,
|
| 7052 |
+
"grad_norm": 2.691675901412964,
|
| 7053 |
+
"learning_rate": 1.1874756894740137e-06,
|
| 7054 |
+
"loss": 0.1252,
|
| 7055 |
+
"step": 926
|
| 7056 |
+
},
|
| 7057 |
+
{
|
| 7058 |
+
"epoch": 0.859925788497217,
|
| 7059 |
+
"grad_norm": 3.750600576400757,
|
| 7060 |
+
"learning_rate": 1.1722140914384162e-06,
|
| 7061 |
+
"loss": 0.1644,
|
| 7062 |
+
"step": 927
|
| 7063 |
+
},
|
| 7064 |
+
{
|
| 7065 |
+
"epoch": 0.8608534322820037,
|
| 7066 |
+
"grad_norm": 3.1353397369384766,
|
| 7067 |
+
"learning_rate": 1.1570450926997657e-06,
|
| 7068 |
+
"loss": 0.1461,
|
| 7069 |
+
"step": 928
|
| 7070 |
+
},
|
| 7071 |
+
{
|
| 7072 |
+
"epoch": 0.8617810760667903,
|
| 7073 |
+
"grad_norm": 5.295469760894775,
|
| 7074 |
+
"learning_rate": 1.1419688523736761e-06,
|
| 7075 |
+
"loss": 0.1967,
|
| 7076 |
+
"step": 929
|
| 7077 |
+
},
|
| 7078 |
+
{
|
| 7079 |
+
"epoch": 0.862708719851577,
|
| 7080 |
+
"grad_norm": 3.461599349975586,
|
| 7081 |
+
"learning_rate": 1.1269855286027798e-06,
|
| 7082 |
+
"loss": 0.1426,
|
| 7083 |
+
"step": 930
|
| 7084 |
+
},
|
| 7085 |
+
{
|
| 7086 |
+
"epoch": 0.8636363636363636,
|
| 7087 |
+
"grad_norm": 6.9660420417785645,
|
| 7088 |
+
"learning_rate": 1.1120952785550477e-06,
|
| 7089 |
+
"loss": 0.2015,
|
| 7090 |
+
"step": 931
|
| 7091 |
+
},
|
| 7092 |
+
{
|
| 7093 |
+
"epoch": 0.8645640074211502,
|
| 7094 |
+
"grad_norm": 2.989213705062866,
|
| 7095 |
+
"learning_rate": 1.0972982584221592e-06,
|
| 7096 |
+
"loss": 0.1204,
|
| 7097 |
+
"step": 932
|
| 7098 |
+
},
|
| 7099 |
+
{
|
| 7100 |
+
"epoch": 0.865491651205937,
|
| 7101 |
+
"grad_norm": 4.492414474487305,
|
| 7102 |
+
"learning_rate": 1.0825946234178575e-06,
|
| 7103 |
+
"loss": 0.1579,
|
| 7104 |
+
"step": 933
|
| 7105 |
+
},
|
| 7106 |
+
{
|
| 7107 |
+
"epoch": 0.8664192949907236,
|
| 7108 |
+
"grad_norm": 4.693439960479736,
|
| 7109 |
+
"learning_rate": 1.067984527776309e-06,
|
| 7110 |
+
"loss": 0.1959,
|
| 7111 |
+
"step": 934
|
| 7112 |
+
},
|
| 7113 |
+
{
|
| 7114 |
+
"epoch": 0.8673469387755102,
|
| 7115 |
+
"grad_norm": 5.462426662445068,
|
| 7116 |
+
"learning_rate": 1.0534681247505107e-06,
|
| 7117 |
+
"loss": 0.1435,
|
| 7118 |
+
"step": 935
|
| 7119 |
+
},
|
| 7120 |
+
{
|
| 7121 |
+
"epoch": 0.8682745825602969,
|
| 7122 |
+
"grad_norm": 2.594604730606079,
|
| 7123 |
+
"learning_rate": 1.0390455666106547e-06,
|
| 7124 |
+
"loss": 0.115,
|
| 7125 |
+
"step": 936
|
| 7126 |
+
},
|
| 7127 |
+
{
|
| 7128 |
+
"epoch": 0.8692022263450835,
|
| 7129 |
+
"grad_norm": 5.900606155395508,
|
| 7130 |
+
"learning_rate": 1.024717004642557e-06,
|
| 7131 |
+
"loss": 0.1749,
|
| 7132 |
+
"step": 937
|
| 7133 |
+
},
|
| 7134 |
+
{
|
| 7135 |
+
"epoch": 0.8701298701298701,
|
| 7136 |
+
"grad_norm": 5.774359226226807,
|
| 7137 |
+
"learning_rate": 1.010482589146048e-06,
|
| 7138 |
+
"loss": 0.1802,
|
| 7139 |
+
"step": 938
|
| 7140 |
+
},
|
| 7141 |
+
{
|
| 7142 |
+
"epoch": 0.8710575139146568,
|
| 7143 |
+
"grad_norm": 4.002913951873779,
|
| 7144 |
+
"learning_rate": 9.963424694334122e-07,
|
| 7145 |
+
"loss": 0.1277,
|
| 7146 |
+
"step": 939
|
| 7147 |
+
},
|
| 7148 |
+
{
|
| 7149 |
+
"epoch": 0.8719851576994434,
|
| 7150 |
+
"grad_norm": 3.6173672676086426,
|
| 7151 |
+
"learning_rate": 9.822967938278172e-07,
|
| 7152 |
+
"loss": 0.1561,
|
| 7153 |
+
"step": 940
|
| 7154 |
+
},
|
| 7155 |
+
{
|
| 7156 |
+
"epoch": 0.8719851576994434,
|
| 7157 |
+
"eval_accuracy": 0.8603104212860311,
|
| 7158 |
+
"eval_f1": 0.704225352112676,
|
| 7159 |
+
"eval_loss": 0.29984721541404724,
|
| 7160 |
+
"eval_precision": 0.8670520231213873,
|
| 7161 |
+
"eval_recall": 0.5928853754940712,
|
| 7162 |
+
"eval_runtime": 48.0345,
|
| 7163 |
+
"eval_samples_per_second": 5.746,
|
| 7164 |
+
"eval_steps_per_second": 0.187,
|
| 7165 |
+
"step": 940
|
| 7166 |
+
},
|
| 7167 |
+
{
|
| 7168 |
+
"epoch": 0.87291280148423,
|
| 7169 |
+
"grad_norm": 5.298496723175049,
|
| 7170 |
+
"learning_rate": 9.683457096617487e-07,
|
| 7171 |
+
"loss": 0.1343,
|
| 7172 |
+
"step": 941
|
| 7173 |
+
},
|
| 7174 |
+
{
|
| 7175 |
+
"epoch": 0.8738404452690167,
|
| 7176 |
+
"grad_norm": 4.087591648101807,
|
| 7177 |
+
"learning_rate": 9.544893632754816e-07,
|
| 7178 |
+
"loss": 0.1342,
|
| 7179 |
+
"step": 942
|
| 7180 |
+
},
|
| 7181 |
+
{
|
| 7182 |
+
"epoch": 0.8747680890538033,
|
| 7183 |
+
"grad_norm": 3.6953861713409424,
|
| 7184 |
+
"learning_rate": 9.407279000155311e-07,
|
| 7185 |
+
"loss": 0.1125,
|
| 7186 |
+
"step": 943
|
| 7187 |
+
},
|
| 7188 |
+
{
|
| 7189 |
+
"epoch": 0.87569573283859,
|
| 7190 |
+
"grad_norm": 5.693349838256836,
|
| 7191 |
+
"learning_rate": 9.270614642331377e-07,
|
| 7192 |
+
"loss": 0.2285,
|
| 7193 |
+
"step": 944
|
| 7194 |
+
},
|
| 7195 |
+
{
|
| 7196 |
+
"epoch": 0.8766233766233766,
|
| 7197 |
+
"grad_norm": 4.321276664733887,
|
| 7198 |
+
"learning_rate": 9.134901992827427e-07,
|
| 7199 |
+
"loss": 0.2169,
|
| 7200 |
+
"step": 945
|
| 7201 |
+
},
|
| 7202 |
+
{
|
| 7203 |
+
"epoch": 0.8775510204081632,
|
| 7204 |
+
"grad_norm": 5.951560020446777,
|
| 7205 |
+
"learning_rate": 9.000142475204965e-07,
|
| 7206 |
+
"loss": 0.2039,
|
| 7207 |
+
"step": 946
|
| 7208 |
+
},
|
| 7209 |
+
{
|
| 7210 |
+
"epoch": 0.87847866419295,
|
| 7211 |
+
"grad_norm": 5.382765293121338,
|
| 7212 |
+
"learning_rate": 8.866337503027523e-07,
|
| 7213 |
+
"loss": 0.1347,
|
| 7214 |
+
"step": 947
|
| 7215 |
+
},
|
| 7216 |
+
{
|
| 7217 |
+
"epoch": 0.8794063079777366,
|
| 7218 |
+
"grad_norm": 4.566171646118164,
|
| 7219 |
+
"learning_rate": 8.733488479845997e-07,
|
| 7220 |
+
"loss": 0.1929,
|
| 7221 |
+
"step": 948
|
| 7222 |
+
},
|
| 7223 |
+
{
|
| 7224 |
+
"epoch": 0.8803339517625232,
|
| 7225 |
+
"grad_norm": 4.413459300994873,
|
| 7226 |
+
"learning_rate": 8.60159679918372e-07,
|
| 7227 |
+
"loss": 0.1463,
|
| 7228 |
+
"step": 949
|
| 7229 |
+
},
|
| 7230 |
+
{
|
| 7231 |
+
"epoch": 0.8812615955473099,
|
| 7232 |
+
"grad_norm": 3.8674092292785645,
|
| 7233 |
+
"learning_rate": 8.470663844522053e-07,
|
| 7234 |
+
"loss": 0.1523,
|
| 7235 |
+
"step": 950
|
| 7236 |
+
},
|
| 7237 |
+
{
|
| 7238 |
+
"epoch": 0.8821892393320965,
|
| 7239 |
+
"grad_norm": 3.844576597213745,
|
| 7240 |
+
"learning_rate": 8.340690989285727e-07,
|
| 7241 |
+
"loss": 0.1248,
|
| 7242 |
+
"step": 951
|
| 7243 |
+
},
|
| 7244 |
+
{
|
| 7245 |
+
"epoch": 0.8831168831168831,
|
| 7246 |
+
"grad_norm": 4.541808605194092,
|
| 7247 |
+
"learning_rate": 8.211679596828481e-07,
|
| 7248 |
+
"loss": 0.1571,
|
| 7249 |
+
"step": 952
|
| 7250 |
+
},
|
| 7251 |
+
{
|
| 7252 |
+
"epoch": 0.8840445269016698,
|
| 7253 |
+
"grad_norm": 3.0702145099639893,
|
| 7254 |
+
"learning_rate": 8.083631020418792e-07,
|
| 7255 |
+
"loss": 0.157,
|
| 7256 |
+
"step": 953
|
| 7257 |
+
},
|
| 7258 |
+
{
|
| 7259 |
+
"epoch": 0.8849721706864564,
|
| 7260 |
+
"grad_norm": 3.5125439167022705,
|
| 7261 |
+
"learning_rate": 7.956546603225601e-07,
|
| 7262 |
+
"loss": 0.1011,
|
| 7263 |
+
"step": 954
|
| 7264 |
+
},
|
| 7265 |
+
{
|
| 7266 |
+
"epoch": 0.885899814471243,
|
| 7267 |
+
"grad_norm": 4.256104469299316,
|
| 7268 |
+
"learning_rate": 7.830427678304353e-07,
|
| 7269 |
+
"loss": 0.1411,
|
| 7270 |
+
"step": 955
|
| 7271 |
+
},
|
| 7272 |
+
{
|
| 7273 |
+
"epoch": 0.8868274582560297,
|
| 7274 |
+
"grad_norm": 4.931686878204346,
|
| 7275 |
+
"learning_rate": 7.705275568582848e-07,
|
| 7276 |
+
"loss": 0.1953,
|
| 7277 |
+
"step": 956
|
| 7278 |
+
},
|
| 7279 |
+
{
|
| 7280 |
+
"epoch": 0.8877551020408163,
|
| 7281 |
+
"grad_norm": 5.233354091644287,
|
| 7282 |
+
"learning_rate": 7.581091586847522e-07,
|
| 7283 |
+
"loss": 0.2095,
|
| 7284 |
+
"step": 957
|
| 7285 |
+
},
|
| 7286 |
+
{
|
| 7287 |
+
"epoch": 0.8886827458256029,
|
| 7288 |
+
"grad_norm": 6.383068084716797,
|
| 7289 |
+
"learning_rate": 7.457877035729588e-07,
|
| 7290 |
+
"loss": 0.2274,
|
| 7291 |
+
"step": 958
|
| 7292 |
+
},
|
| 7293 |
+
{
|
| 7294 |
+
"epoch": 0.8896103896103896,
|
| 7295 |
+
"grad_norm": 2.8475682735443115,
|
| 7296 |
+
"learning_rate": 7.335633207691362e-07,
|
| 7297 |
+
"loss": 0.1336,
|
| 7298 |
+
"step": 959
|
| 7299 |
+
},
|
| 7300 |
+
{
|
| 7301 |
+
"epoch": 0.8905380333951762,
|
| 7302 |
+
"grad_norm": 3.393915891647339,
|
| 7303 |
+
"learning_rate": 7.21436138501278e-07,
|
| 7304 |
+
"loss": 0.1633,
|
| 7305 |
+
"step": 960
|
| 7306 |
+
},
|
| 7307 |
+
{
|
| 7308 |
+
"epoch": 0.8905380333951762,
|
| 7309 |
+
"eval_accuracy": 0.8569844789356984,
|
| 7310 |
+
"eval_f1": 0.6935866983372921,
|
| 7311 |
+
"eval_loss": 0.3064272701740265,
|
| 7312 |
+
"eval_precision": 0.8690476190476191,
|
| 7313 |
+
"eval_recall": 0.5770750988142292,
|
| 7314 |
+
"eval_runtime": 48.2701,
|
| 7315 |
+
"eval_samples_per_second": 5.718,
|
| 7316 |
+
"eval_steps_per_second": 0.186,
|
| 7317 |
+
"step": 960
|
| 7318 |
+
},
|
| 7319 |
+
{
|
| 7320 |
+
"epoch": 0.891465677179963,
|
| 7321 |
+
"grad_norm": 4.68550968170166,
|
| 7322 |
+
"learning_rate": 7.094062839777838e-07,
|
| 7323 |
+
"loss": 0.1854,
|
| 7324 |
+
"step": 961
|
| 7325 |
+
},
|
| 7326 |
+
{
|
| 7327 |
+
"epoch": 0.8923933209647495,
|
| 7328 |
+
"grad_norm": 5.072958946228027,
|
| 7329 |
+
"learning_rate": 6.974738833861383e-07,
|
| 7330 |
+
"loss": 0.1762,
|
| 7331 |
+
"step": 962
|
| 7332 |
+
},
|
| 7333 |
+
{
|
| 7334 |
+
"epoch": 0.8933209647495362,
|
| 7335 |
+
"grad_norm": 4.519327640533447,
|
| 7336 |
+
"learning_rate": 6.856390618915775e-07,
|
| 7337 |
+
"loss": 0.182,
|
| 7338 |
+
"step": 963
|
| 7339 |
+
},
|
| 7340 |
+
{
|
| 7341 |
+
"epoch": 0.8942486085343229,
|
| 7342 |
+
"grad_norm": 5.558988094329834,
|
| 7343 |
+
"learning_rate": 6.739019436357774e-07,
|
| 7344 |
+
"loss": 0.1665,
|
| 7345 |
+
"step": 964
|
| 7346 |
+
},
|
| 7347 |
+
{
|
| 7348 |
+
"epoch": 0.8951762523191095,
|
| 7349 |
+
"grad_norm": 2.263278007507324,
|
| 7350 |
+
"learning_rate": 6.622626517355557e-07,
|
| 7351 |
+
"loss": 0.1112,
|
| 7352 |
+
"step": 965
|
| 7353 |
+
},
|
| 7354 |
+
{
|
| 7355 |
+
"epoch": 0.8961038961038961,
|
| 7356 |
+
"grad_norm": 5.888603687286377,
|
| 7357 |
+
"learning_rate": 6.507213082815745e-07,
|
| 7358 |
+
"loss": 0.1455,
|
| 7359 |
+
"step": 966
|
| 7360 |
+
},
|
| 7361 |
+
{
|
| 7362 |
+
"epoch": 0.8970315398886828,
|
| 7363 |
+
"grad_norm": 5.091086387634277,
|
| 7364 |
+
"learning_rate": 6.392780343370686e-07,
|
| 7365 |
+
"loss": 0.1812,
|
| 7366 |
+
"step": 967
|
| 7367 |
+
},
|
| 7368 |
+
{
|
| 7369 |
+
"epoch": 0.8979591836734694,
|
| 7370 |
+
"grad_norm": 6.290548324584961,
|
| 7371 |
+
"learning_rate": 6.279329499365649e-07,
|
| 7372 |
+
"loss": 0.1527,
|
| 7373 |
+
"step": 968
|
| 7374 |
+
},
|
| 7375 |
+
{
|
| 7376 |
+
"epoch": 0.898886827458256,
|
| 7377 |
+
"grad_norm": 6.533473014831543,
|
| 7378 |
+
"learning_rate": 6.166861740846297e-07,
|
| 7379 |
+
"loss": 0.2105,
|
| 7380 |
+
"step": 969
|
| 7381 |
+
},
|
| 7382 |
+
{
|
| 7383 |
+
"epoch": 0.8998144712430427,
|
| 7384 |
+
"grad_norm": 3.4495279788970947,
|
| 7385 |
+
"learning_rate": 6.055378247546217e-07,
|
| 7386 |
+
"loss": 0.1222,
|
| 7387 |
+
"step": 970
|
| 7388 |
+
},
|
| 7389 |
+
{
|
| 7390 |
+
"epoch": 0.9007421150278293,
|
| 7391 |
+
"grad_norm": 5.290384769439697,
|
| 7392 |
+
"learning_rate": 5.94488018887448e-07,
|
| 7393 |
+
"loss": 0.2046,
|
| 7394 |
+
"step": 971
|
| 7395 |
+
},
|
| 7396 |
+
{
|
| 7397 |
+
"epoch": 0.9016697588126159,
|
| 7398 |
+
"grad_norm": 6.091614723205566,
|
| 7399 |
+
"learning_rate": 5.835368723903456e-07,
|
| 7400 |
+
"loss": 0.2643,
|
| 7401 |
+
"step": 972
|
| 7402 |
+
},
|
| 7403 |
+
{
|
| 7404 |
+
"epoch": 0.9025974025974026,
|
| 7405 |
+
"grad_norm": 4.488548278808594,
|
| 7406 |
+
"learning_rate": 5.726845001356573e-07,
|
| 7407 |
+
"loss": 0.1263,
|
| 7408 |
+
"step": 973
|
| 7409 |
+
},
|
| 7410 |
+
{
|
| 7411 |
+
"epoch": 0.9035250463821892,
|
| 7412 |
+
"grad_norm": 2.7875099182128906,
|
| 7413 |
+
"learning_rate": 5.619310159596358e-07,
|
| 7414 |
+
"loss": 0.0922,
|
| 7415 |
+
"step": 974
|
| 7416 |
+
},
|
| 7417 |
+
{
|
| 7418 |
+
"epoch": 0.9044526901669759,
|
| 7419 |
+
"grad_norm": 5.558516025543213,
|
| 7420 |
+
"learning_rate": 5.51276532661238e-07,
|
| 7421 |
+
"loss": 0.2045,
|
| 7422 |
+
"step": 975
|
| 7423 |
+
},
|
| 7424 |
+
{
|
| 7425 |
+
"epoch": 0.9053803339517625,
|
| 7426 |
+
"grad_norm": 5.901011943817139,
|
| 7427 |
+
"learning_rate": 5.407211620009545e-07,
|
| 7428 |
+
"loss": 0.1743,
|
| 7429 |
+
"step": 976
|
| 7430 |
+
},
|
| 7431 |
+
{
|
| 7432 |
+
"epoch": 0.9063079777365491,
|
| 7433 |
+
"grad_norm": 3.838674783706665,
|
| 7434 |
+
"learning_rate": 5.30265014699628e-07,
|
| 7435 |
+
"loss": 0.1728,
|
| 7436 |
+
"step": 977
|
| 7437 |
+
},
|
| 7438 |
+
{
|
| 7439 |
+
"epoch": 0.9072356215213359,
|
| 7440 |
+
"grad_norm": 3.811453104019165,
|
| 7441 |
+
"learning_rate": 5.199082004372958e-07,
|
| 7442 |
+
"loss": 0.153,
|
| 7443 |
+
"step": 978
|
| 7444 |
+
},
|
| 7445 |
+
{
|
| 7446 |
+
"epoch": 0.9081632653061225,
|
| 7447 |
+
"grad_norm": 5.14892578125,
|
| 7448 |
+
"learning_rate": 5.096508278520385e-07,
|
| 7449 |
+
"loss": 0.1991,
|
| 7450 |
+
"step": 979
|
| 7451 |
+
},
|
| 7452 |
+
{
|
| 7453 |
+
"epoch": 0.9090909090909091,
|
| 7454 |
+
"grad_norm": 3.6292712688446045,
|
| 7455 |
+
"learning_rate": 4.994930045388414e-07,
|
| 7456 |
+
"loss": 0.1452,
|
| 7457 |
+
"step": 980
|
| 7458 |
+
},
|
| 7459 |
+
{
|
| 7460 |
+
"epoch": 0.9090909090909091,
|
| 7461 |
+
"eval_accuracy": 0.8603104212860311,
|
| 7462 |
+
"eval_f1": 0.7028301886792453,
|
| 7463 |
+
"eval_loss": 0.3034472167491913,
|
| 7464 |
+
"eval_precision": 0.8713450292397661,
|
| 7465 |
+
"eval_recall": 0.5889328063241107,
|
| 7466 |
+
"eval_runtime": 48.7572,
|
| 7467 |
+
"eval_samples_per_second": 5.661,
|
| 7468 |
+
"eval_steps_per_second": 0.185,
|
| 7469 |
+
"step": 980
|
| 7470 |
+
},
|
| 7471 |
+
{
|
| 7472 |
+
"epoch": 0.9100185528756958,
|
| 7473 |
+
"grad_norm": 3.438109874725342,
|
| 7474 |
+
"learning_rate": 4.894348370484648e-07,
|
| 7475 |
+
"loss": 0.1054,
|
| 7476 |
+
"step": 981
|
| 7477 |
+
},
|
| 7478 |
+
{
|
| 7479 |
+
"epoch": 0.9109461966604824,
|
| 7480 |
+
"grad_norm": 5.481462478637695,
|
| 7481 |
+
"learning_rate": 4.794764308863242e-07,
|
| 7482 |
+
"loss": 0.1463,
|
| 7483 |
+
"step": 982
|
| 7484 |
+
},
|
| 7485 |
+
{
|
| 7486 |
+
"epoch": 0.911873840445269,
|
| 7487 |
+
"grad_norm": 6.784456253051758,
|
| 7488 |
+
"learning_rate": 4.696178905113913e-07,
|
| 7489 |
+
"loss": 0.1634,
|
| 7490 |
+
"step": 983
|
| 7491 |
+
},
|
| 7492 |
+
{
|
| 7493 |
+
"epoch": 0.9128014842300557,
|
| 7494 |
+
"grad_norm": 3.902355194091797,
|
| 7495 |
+
"learning_rate": 4.5985931933508757e-07,
|
| 7496 |
+
"loss": 0.1689,
|
| 7497 |
+
"step": 984
|
| 7498 |
+
},
|
| 7499 |
+
{
|
| 7500 |
+
"epoch": 0.9137291280148423,
|
| 7501 |
+
"grad_norm": 4.524623394012451,
|
| 7502 |
+
"learning_rate": 4.502008197202068e-07,
|
| 7503 |
+
"loss": 0.1428,
|
| 7504 |
+
"step": 985
|
| 7505 |
+
},
|
| 7506 |
+
{
|
| 7507 |
+
"epoch": 0.9146567717996289,
|
| 7508 |
+
"grad_norm": 3.976349353790283,
|
| 7509 |
+
"learning_rate": 4.406424929798403e-07,
|
| 7510 |
+
"loss": 0.1864,
|
| 7511 |
+
"step": 986
|
| 7512 |
+
},
|
| 7513 |
+
{
|
| 7514 |
+
"epoch": 0.9155844155844156,
|
| 7515 |
+
"grad_norm": 3.3905527591705322,
|
| 7516 |
+
"learning_rate": 4.3118443937631094e-07,
|
| 7517 |
+
"loss": 0.1719,
|
| 7518 |
+
"step": 987
|
| 7519 |
+
},
|
| 7520 |
+
{
|
| 7521 |
+
"epoch": 0.9165120593692022,
|
| 7522 |
+
"grad_norm": 4.1316938400268555,
|
| 7523 |
+
"learning_rate": 4.218267581201296e-07,
|
| 7524 |
+
"loss": 0.1124,
|
| 7525 |
+
"step": 988
|
| 7526 |
+
},
|
| 7527 |
+
{
|
| 7528 |
+
"epoch": 0.9174397031539888,
|
| 7529 |
+
"grad_norm": 5.6381754875183105,
|
| 7530 |
+
"learning_rate": 4.125695473689406e-07,
|
| 7531 |
+
"loss": 0.1994,
|
| 7532 |
+
"step": 989
|
| 7533 |
+
},
|
| 7534 |
+
{
|
| 7535 |
+
"epoch": 0.9183673469387755,
|
| 7536 |
+
"grad_norm": 7.625948905944824,
|
| 7537 |
+
"learning_rate": 4.034129042265067e-07,
|
| 7538 |
+
"loss": 0.2211,
|
| 7539 |
+
"step": 990
|
| 7540 |
+
},
|
| 7541 |
+
{
|
| 7542 |
+
"epoch": 0.9192949907235621,
|
| 7543 |
+
"grad_norm": 3.567246437072754,
|
| 7544 |
+
"learning_rate": 3.943569247416801e-07,
|
| 7545 |
+
"loss": 0.1359,
|
| 7546 |
+
"step": 991
|
| 7547 |
+
},
|
| 7548 |
+
{
|
| 7549 |
+
"epoch": 0.9202226345083488,
|
| 7550 |
+
"grad_norm": 4.336119174957275,
|
| 7551 |
+
"learning_rate": 3.8540170390740097e-07,
|
| 7552 |
+
"loss": 0.1519,
|
| 7553 |
+
"step": 992
|
| 7554 |
+
},
|
| 7555 |
+
{
|
| 7556 |
+
"epoch": 0.9211502782931354,
|
| 7557 |
+
"grad_norm": 4.9389848709106445,
|
| 7558 |
+
"learning_rate": 3.7654733565969826e-07,
|
| 7559 |
+
"loss": 0.1874,
|
| 7560 |
+
"step": 993
|
| 7561 |
+
},
|
| 7562 |
+
{
|
| 7563 |
+
"epoch": 0.922077922077922,
|
| 7564 |
+
"grad_norm": 3.25769305229187,
|
| 7565 |
+
"learning_rate": 3.67793912876705e-07,
|
| 7566 |
+
"loss": 0.1191,
|
| 7567 |
+
"step": 994
|
| 7568 |
+
},
|
| 7569 |
+
{
|
| 7570 |
+
"epoch": 0.9230055658627088,
|
| 7571 |
+
"grad_norm": 3.4334826469421387,
|
| 7572 |
+
"learning_rate": 3.591415273776855e-07,
|
| 7573 |
+
"loss": 0.1012,
|
| 7574 |
+
"step": 995
|
| 7575 |
+
},
|
| 7576 |
+
{
|
| 7577 |
+
"epoch": 0.9239332096474954,
|
| 7578 |
+
"grad_norm": 3.1981468200683594,
|
| 7579 |
+
"learning_rate": 3.5059026992206645e-07,
|
| 7580 |
+
"loss": 0.0812,
|
| 7581 |
+
"step": 996
|
| 7582 |
+
},
|
| 7583 |
+
{
|
| 7584 |
+
"epoch": 0.924860853432282,
|
| 7585 |
+
"grad_norm": 5.118222236633301,
|
| 7586 |
+
"learning_rate": 3.421402302084953e-07,
|
| 7587 |
+
"loss": 0.1293,
|
| 7588 |
+
"step": 997
|
| 7589 |
+
},
|
| 7590 |
+
{
|
| 7591 |
+
"epoch": 0.9257884972170687,
|
| 7592 |
+
"grad_norm": 4.047184944152832,
|
| 7593 |
+
"learning_rate": 3.3379149687388866e-07,
|
| 7594 |
+
"loss": 0.1723,
|
| 7595 |
+
"step": 998
|
| 7596 |
+
},
|
| 7597 |
+
{
|
| 7598 |
+
"epoch": 0.9267161410018553,
|
| 7599 |
+
"grad_norm": 7.083133220672607,
|
| 7600 |
+
"learning_rate": 3.255441574925089e-07,
|
| 7601 |
+
"loss": 0.2061,
|
| 7602 |
+
"step": 999
|
| 7603 |
+
},
|
| 7604 |
+
{
|
| 7605 |
+
"epoch": 0.9276437847866419,
|
| 7606 |
+
"grad_norm": 2.8097355365753174,
|
| 7607 |
+
"learning_rate": 3.1739829857504235e-07,
|
| 7608 |
+
"loss": 0.086,
|
| 7609 |
+
"step": 1000
|
| 7610 |
+
},
|
| 7611 |
+
{
|
| 7612 |
+
"epoch": 0.9276437847866419,
|
| 7613 |
+
"eval_accuracy": 0.8580931263858093,
|
| 7614 |
+
"eval_f1": 0.6966824644549763,
|
| 7615 |
+
"eval_loss": 0.30505669116973877,
|
| 7616 |
+
"eval_precision": 0.8698224852071006,
|
| 7617 |
+
"eval_recall": 0.5810276679841897,
|
| 7618 |
+
"eval_runtime": 47.8654,
|
| 7619 |
+
"eval_samples_per_second": 5.766,
|
| 7620 |
+
"eval_steps_per_second": 0.188,
|
| 7621 |
+
"step": 1000
|
| 7622 |
}
|
| 7623 |
],
|
| 7624 |
"logging_steps": 1,
|
|
|
|
| 7638 |
"attributes": {}
|
| 7639 |
}
|
| 7640 |
},
|
| 7641 |
+
"total_flos": 3.198993040534405e+17,
|
| 7642 |
"train_batch_size": 8,
|
| 7643 |
"trial_name": null,
|
| 7644 |
"trial_params": null
|