Training in progress, step 1000, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cd9f4e1d0bb326b818db1b6faa552753bc4a3328ac93e01b3631a83d08e1c95
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c1ce66274008394f36d101e20d4378dd480a6f7db7387a58eed60435a8f39a7
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b3e7a569d804afc7e9af01c045d344bcf8aa04435a748d8f22d80f77f68191f
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84f72d90d6d6f96ffde5e12766b8aa3f0ebf70484ff977b4cc1380cfd2635d82
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7ec10706acfd7aebf2e0313a26ad47f112db6494baa4011866a112fa6459782
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d45fd8c4f5cac20eb0715bd7c3583b8b9d6d50be52eb3b819ead289c264bf4c
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc7bbedf822084a972aaf7dbfdc31778a6b5afdff5f9d51666b28397948c4cf6
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc56dd27c16979078189d0168509b3491fac9a7018e2acd5413b0b5bfb9e62b8
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15088
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbefc2a9b5877ac52b5c278c40b832840a445a83b4f45552eae9c8d8fd7025ab
|
| 3 |
size 15088
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15088
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f7ee2bc06c634de7d668e8f27eb2c655185598b0005a48f28db9b8c13871cf8
|
| 3 |
size 15088
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15088
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e89f6ffe398cd010021cbea856f31e9f12c086dc22192dd94cd4139ed13bc428
|
| 3 |
size 15088
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15088
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cac7e1aa01f996ea4ccf65c0edbca9c2218b27d0fee393e5dadf9e12f0a4ac0
|
| 3 |
size 15088
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca19ec64a3f37f86c1a9f3bd1615be54fe5912d912de79a7d3f808a593a2192d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6859,6 +6859,766 @@
|
|
| 6859 |
"eval_samples_per_second": 5.876,
|
| 6860 |
"eval_steps_per_second": 0.201,
|
| 6861 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6862 |
}
|
| 6863 |
],
|
| 6864 |
"logging_steps": 1,
|
|
@@ -6878,7 +7638,7 @@
|
|
| 6878 |
"attributes": {}
|
| 6879 |
}
|
| 6880 |
},
|
| 6881 |
-
"total_flos":
|
| 6882 |
"train_batch_size": 8,
|
| 6883 |
"trial_name": null,
|
| 6884 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.847457627118644,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 1000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6859 |
"eval_samples_per_second": 5.876,
|
| 6860 |
"eval_steps_per_second": 0.201,
|
| 6861 |
"step": 900
|
| 6862 |
+
},
|
| 6863 |
+
{
|
| 6864 |
+
"epoch": 0.7635593220338983,
|
| 6865 |
+
"grad_norm": 1.5581014156341553,
|
| 6866 |
+
"learning_rate": 3.216881637303839e-06,
|
| 6867 |
+
"loss": 0.0083,
|
| 6868 |
+
"step": 901
|
| 6869 |
+
},
|
| 6870 |
+
{
|
| 6871 |
+
"epoch": 0.764406779661017,
|
| 6872 |
+
"grad_norm": 1.8738924264907837,
|
| 6873 |
+
"learning_rate": 3.1951753680566143e-06,
|
| 6874 |
+
"loss": 0.0215,
|
| 6875 |
+
"step": 902
|
| 6876 |
+
},
|
| 6877 |
+
{
|
| 6878 |
+
"epoch": 0.7652542372881356,
|
| 6879 |
+
"grad_norm": 0.4267842173576355,
|
| 6880 |
+
"learning_rate": 3.1735286468303563e-06,
|
| 6881 |
+
"loss": 0.0016,
|
| 6882 |
+
"step": 903
|
| 6883 |
+
},
|
| 6884 |
+
{
|
| 6885 |
+
"epoch": 0.7661016949152543,
|
| 6886 |
+
"grad_norm": 1.4631012678146362,
|
| 6887 |
+
"learning_rate": 3.151941663052345e-06,
|
| 6888 |
+
"loss": 0.0058,
|
| 6889 |
+
"step": 904
|
| 6890 |
+
},
|
| 6891 |
+
{
|
| 6892 |
+
"epoch": 0.7669491525423728,
|
| 6893 |
+
"grad_norm": 0.23579372465610504,
|
| 6894 |
+
"learning_rate": 3.130414605627102e-06,
|
| 6895 |
+
"loss": 0.0017,
|
| 6896 |
+
"step": 905
|
| 6897 |
+
},
|
| 6898 |
+
{
|
| 6899 |
+
"epoch": 0.7677966101694915,
|
| 6900 |
+
"grad_norm": 1.0443428754806519,
|
| 6901 |
+
"learning_rate": 3.1089476629347494e-06,
|
| 6902 |
+
"loss": 0.0078,
|
| 6903 |
+
"step": 906
|
| 6904 |
+
},
|
| 6905 |
+
{
|
| 6906 |
+
"epoch": 0.7686440677966102,
|
| 6907 |
+
"grad_norm": 0.8802245259284973,
|
| 6908 |
+
"learning_rate": 3.087541022829347e-06,
|
| 6909 |
+
"loss": 0.0052,
|
| 6910 |
+
"step": 907
|
| 6911 |
+
},
|
| 6912 |
+
{
|
| 6913 |
+
"epoch": 0.7694915254237288,
|
| 6914 |
+
"grad_norm": 0.9820923805236816,
|
| 6915 |
+
"learning_rate": 3.066194872637258e-06,
|
| 6916 |
+
"loss": 0.0022,
|
| 6917 |
+
"step": 908
|
| 6918 |
+
},
|
| 6919 |
+
{
|
| 6920 |
+
"epoch": 0.7703389830508475,
|
| 6921 |
+
"grad_norm": 0.40738704800605774,
|
| 6922 |
+
"learning_rate": 3.04490939915551e-06,
|
| 6923 |
+
"loss": 0.001,
|
| 6924 |
+
"step": 909
|
| 6925 |
+
},
|
| 6926 |
+
{
|
| 6927 |
+
"epoch": 0.7711864406779662,
|
| 6928 |
+
"grad_norm": 2.081660032272339,
|
| 6929 |
+
"learning_rate": 3.023684788650154e-06,
|
| 6930 |
+
"loss": 0.0101,
|
| 6931 |
+
"step": 910
|
| 6932 |
+
},
|
| 6933 |
+
{
|
| 6934 |
+
"epoch": 0.7720338983050847,
|
| 6935 |
+
"grad_norm": 1.3725014925003052,
|
| 6936 |
+
"learning_rate": 3.002521226854641e-06,
|
| 6937 |
+
"loss": 0.0069,
|
| 6938 |
+
"step": 911
|
| 6939 |
+
},
|
| 6940 |
+
{
|
| 6941 |
+
"epoch": 0.7728813559322034,
|
| 6942 |
+
"grad_norm": 2.1171929836273193,
|
| 6943 |
+
"learning_rate": 2.981418898968186e-06,
|
| 6944 |
+
"loss": 0.0139,
|
| 6945 |
+
"step": 912
|
| 6946 |
+
},
|
| 6947 |
+
{
|
| 6948 |
+
"epoch": 0.773728813559322,
|
| 6949 |
+
"grad_norm": 1.6483219861984253,
|
| 6950 |
+
"learning_rate": 2.9603779896541705e-06,
|
| 6951 |
+
"loss": 0.0092,
|
| 6952 |
+
"step": 913
|
| 6953 |
+
},
|
| 6954 |
+
{
|
| 6955 |
+
"epoch": 0.7745762711864407,
|
| 6956 |
+
"grad_norm": 0.36683687567710876,
|
| 6957 |
+
"learning_rate": 2.939398683038497e-06,
|
| 6958 |
+
"loss": 0.0012,
|
| 6959 |
+
"step": 914
|
| 6960 |
+
},
|
| 6961 |
+
{
|
| 6962 |
+
"epoch": 0.7754237288135594,
|
| 6963 |
+
"grad_norm": 1.9361350536346436,
|
| 6964 |
+
"learning_rate": 2.918481162707999e-06,
|
| 6965 |
+
"loss": 0.0093,
|
| 6966 |
+
"step": 915
|
| 6967 |
+
},
|
| 6968 |
+
{
|
| 6969 |
+
"epoch": 0.7762711864406779,
|
| 6970 |
+
"grad_norm": 0.6846543550491333,
|
| 6971 |
+
"learning_rate": 2.89762561170882e-06,
|
| 6972 |
+
"loss": 0.0035,
|
| 6973 |
+
"step": 916
|
| 6974 |
+
},
|
| 6975 |
+
{
|
| 6976 |
+
"epoch": 0.7771186440677966,
|
| 6977 |
+
"grad_norm": 1.052035927772522,
|
| 6978 |
+
"learning_rate": 2.8768322125448265e-06,
|
| 6979 |
+
"loss": 0.0123,
|
| 6980 |
+
"step": 917
|
| 6981 |
+
},
|
| 6982 |
+
{
|
| 6983 |
+
"epoch": 0.7779661016949152,
|
| 6984 |
+
"grad_norm": 0.6025975942611694,
|
| 6985 |
+
"learning_rate": 2.856101147175998e-06,
|
| 6986 |
+
"loss": 0.0035,
|
| 6987 |
+
"step": 918
|
| 6988 |
+
},
|
| 6989 |
+
{
|
| 6990 |
+
"epoch": 0.7788135593220339,
|
| 6991 |
+
"grad_norm": 1.8254081010818481,
|
| 6992 |
+
"learning_rate": 2.8354325970168483e-06,
|
| 6993 |
+
"loss": 0.0175,
|
| 6994 |
+
"step": 919
|
| 6995 |
+
},
|
| 6996 |
+
{
|
| 6997 |
+
"epoch": 0.7796610169491526,
|
| 6998 |
+
"grad_norm": 0.6324992179870605,
|
| 6999 |
+
"learning_rate": 2.814826742934823e-06,
|
| 7000 |
+
"loss": 0.0027,
|
| 7001 |
+
"step": 920
|
| 7002 |
+
},
|
| 7003 |
+
{
|
| 7004 |
+
"epoch": 0.7796610169491526,
|
| 7005 |
+
"eval_accuracy": 1.0,
|
| 7006 |
+
"eval_f1": 1.0,
|
| 7007 |
+
"eval_loss": 7.932856533443555e-05,
|
| 7008 |
+
"eval_precision": 1.0,
|
| 7009 |
+
"eval_recall": 1.0,
|
| 7010 |
+
"eval_runtime": 50.5195,
|
| 7011 |
+
"eval_samples_per_second": 5.78,
|
| 7012 |
+
"eval_steps_per_second": 0.198,
|
| 7013 |
+
"step": 920
|
| 7014 |
+
},
|
| 7015 |
+
{
|
| 7016 |
+
"epoch": 0.7805084745762711,
|
| 7017 |
+
"grad_norm": 4.134251117706299,
|
| 7018 |
+
"learning_rate": 2.794283765248722e-06,
|
| 7019 |
+
"loss": 0.0218,
|
| 7020 |
+
"step": 921
|
| 7021 |
+
},
|
| 7022 |
+
{
|
| 7023 |
+
"epoch": 0.7813559322033898,
|
| 7024 |
+
"grad_norm": 1.057350754737854,
|
| 7025 |
+
"learning_rate": 2.7738038437271288e-06,
|
| 7026 |
+
"loss": 0.0032,
|
| 7027 |
+
"step": 922
|
| 7028 |
+
},
|
| 7029 |
+
{
|
| 7030 |
+
"epoch": 0.7822033898305085,
|
| 7031 |
+
"grad_norm": 0.7094781994819641,
|
| 7032 |
+
"learning_rate": 2.7533871575868275e-06,
|
| 7033 |
+
"loss": 0.0028,
|
| 7034 |
+
"step": 923
|
| 7035 |
+
},
|
| 7036 |
+
{
|
| 7037 |
+
"epoch": 0.7830508474576271,
|
| 7038 |
+
"grad_norm": 2.3617732524871826,
|
| 7039 |
+
"learning_rate": 2.733033885491241e-06,
|
| 7040 |
+
"loss": 0.0126,
|
| 7041 |
+
"step": 924
|
| 7042 |
+
},
|
| 7043 |
+
{
|
| 7044 |
+
"epoch": 0.7838983050847458,
|
| 7045 |
+
"grad_norm": 0.1944715678691864,
|
| 7046 |
+
"learning_rate": 2.7127442055488617e-06,
|
| 7047 |
+
"loss": 0.0007,
|
| 7048 |
+
"step": 925
|
| 7049 |
+
},
|
| 7050 |
+
{
|
| 7051 |
+
"epoch": 0.7847457627118644,
|
| 7052 |
+
"grad_norm": 0.6528817415237427,
|
| 7053 |
+
"learning_rate": 2.6925182953117022e-06,
|
| 7054 |
+
"loss": 0.0046,
|
| 7055 |
+
"step": 926
|
| 7056 |
+
},
|
| 7057 |
+
{
|
| 7058 |
+
"epoch": 0.785593220338983,
|
| 7059 |
+
"grad_norm": 0.31304916739463806,
|
| 7060 |
+
"learning_rate": 2.67235633177373e-06,
|
| 7061 |
+
"loss": 0.0016,
|
| 7062 |
+
"step": 927
|
| 7063 |
+
},
|
| 7064 |
+
{
|
| 7065 |
+
"epoch": 0.7864406779661017,
|
| 7066 |
+
"grad_norm": 0.75702303647995,
|
| 7067 |
+
"learning_rate": 2.6522584913693295e-06,
|
| 7068 |
+
"loss": 0.0047,
|
| 7069 |
+
"step": 928
|
| 7070 |
+
},
|
| 7071 |
+
{
|
| 7072 |
+
"epoch": 0.7872881355932203,
|
| 7073 |
+
"grad_norm": 1.600816011428833,
|
| 7074 |
+
"learning_rate": 2.6322249499717477e-06,
|
| 7075 |
+
"loss": 0.0062,
|
| 7076 |
+
"step": 929
|
| 7077 |
+
},
|
| 7078 |
+
{
|
| 7079 |
+
"epoch": 0.788135593220339,
|
| 7080 |
+
"grad_norm": 1.592640995979309,
|
| 7081 |
+
"learning_rate": 2.6122558828915647e-06,
|
| 7082 |
+
"loss": 0.0064,
|
| 7083 |
+
"step": 930
|
| 7084 |
+
},
|
| 7085 |
+
{
|
| 7086 |
+
"epoch": 0.7889830508474577,
|
| 7087 |
+
"grad_norm": 2.1126153469085693,
|
| 7088 |
+
"learning_rate": 2.5923514648751537e-06,
|
| 7089 |
+
"loss": 0.0265,
|
| 7090 |
+
"step": 931
|
| 7091 |
+
},
|
| 7092 |
+
{
|
| 7093 |
+
"epoch": 0.7898305084745763,
|
| 7094 |
+
"grad_norm": 1.4339178800582886,
|
| 7095 |
+
"learning_rate": 2.572511870103149e-06,
|
| 7096 |
+
"loss": 0.0054,
|
| 7097 |
+
"step": 932
|
| 7098 |
+
},
|
| 7099 |
+
{
|
| 7100 |
+
"epoch": 0.7906779661016949,
|
| 7101 |
+
"grad_norm": 2.253162145614624,
|
| 7102 |
+
"learning_rate": 2.55273727218894e-06,
|
| 7103 |
+
"loss": 0.0321,
|
| 7104 |
+
"step": 933
|
| 7105 |
+
},
|
| 7106 |
+
{
|
| 7107 |
+
"epoch": 0.7915254237288135,
|
| 7108 |
+
"grad_norm": 1.1612133979797363,
|
| 7109 |
+
"learning_rate": 2.533027844177123e-06,
|
| 7110 |
+
"loss": 0.0062,
|
| 7111 |
+
"step": 934
|
| 7112 |
+
},
|
| 7113 |
+
{
|
| 7114 |
+
"epoch": 0.7923728813559322,
|
| 7115 |
+
"grad_norm": 1.0363982915878296,
|
| 7116 |
+
"learning_rate": 2.5133837585420084e-06,
|
| 7117 |
+
"loss": 0.0053,
|
| 7118 |
+
"step": 935
|
| 7119 |
+
},
|
| 7120 |
+
{
|
| 7121 |
+
"epoch": 0.7932203389830509,
|
| 7122 |
+
"grad_norm": 1.3332302570343018,
|
| 7123 |
+
"learning_rate": 2.4938051871861046e-06,
|
| 7124 |
+
"loss": 0.0072,
|
| 7125 |
+
"step": 936
|
| 7126 |
+
},
|
| 7127 |
+
{
|
| 7128 |
+
"epoch": 0.7940677966101695,
|
| 7129 |
+
"grad_norm": 0.3061300218105316,
|
| 7130 |
+
"learning_rate": 2.4742923014386154e-06,
|
| 7131 |
+
"loss": 0.0015,
|
| 7132 |
+
"step": 937
|
| 7133 |
+
},
|
| 7134 |
+
{
|
| 7135 |
+
"epoch": 0.7949152542372881,
|
| 7136 |
+
"grad_norm": 2.649893045425415,
|
| 7137 |
+
"learning_rate": 2.4548452720539375e-06,
|
| 7138 |
+
"loss": 0.0238,
|
| 7139 |
+
"step": 938
|
| 7140 |
+
},
|
| 7141 |
+
{
|
| 7142 |
+
"epoch": 0.7957627118644067,
|
| 7143 |
+
"grad_norm": 0.9358623623847961,
|
| 7144 |
+
"learning_rate": 2.435464269210167e-06,
|
| 7145 |
+
"loss": 0.0036,
|
| 7146 |
+
"step": 939
|
| 7147 |
+
},
|
| 7148 |
+
{
|
| 7149 |
+
"epoch": 0.7966101694915254,
|
| 7150 |
+
"grad_norm": 1.4924583435058594,
|
| 7151 |
+
"learning_rate": 2.4161494625076164e-06,
|
| 7152 |
+
"loss": 0.0105,
|
| 7153 |
+
"step": 940
|
| 7154 |
+
},
|
| 7155 |
+
{
|
| 7156 |
+
"epoch": 0.7966101694915254,
|
| 7157 |
+
"eval_accuracy": 1.0,
|
| 7158 |
+
"eval_f1": 1.0,
|
| 7159 |
+
"eval_loss": 0.00010792797547765076,
|
| 7160 |
+
"eval_precision": 1.0,
|
| 7161 |
+
"eval_recall": 1.0,
|
| 7162 |
+
"eval_runtime": 49.837,
|
| 7163 |
+
"eval_samples_per_second": 5.859,
|
| 7164 |
+
"eval_steps_per_second": 0.201,
|
| 7165 |
+
"step": 940
|
| 7166 |
+
},
|
| 7167 |
+
{
|
| 7168 |
+
"epoch": 0.7974576271186441,
|
| 7169 |
+
"grad_norm": 0.9415515661239624,
|
| 7170 |
+
"learning_rate": 2.3969010209673215e-06,
|
| 7171 |
+
"loss": 0.0031,
|
| 7172 |
+
"step": 941
|
| 7173 |
+
},
|
| 7174 |
+
{
|
| 7175 |
+
"epoch": 0.7983050847457627,
|
| 7176 |
+
"grad_norm": 1.4553923606872559,
|
| 7177 |
+
"learning_rate": 2.3777191130295673e-06,
|
| 7178 |
+
"loss": 0.008,
|
| 7179 |
+
"step": 942
|
| 7180 |
+
},
|
| 7181 |
+
{
|
| 7182 |
+
"epoch": 0.7991525423728814,
|
| 7183 |
+
"grad_norm": 0.9974135160446167,
|
| 7184 |
+
"learning_rate": 2.3586039065524113e-06,
|
| 7185 |
+
"loss": 0.0037,
|
| 7186 |
+
"step": 943
|
| 7187 |
+
},
|
| 7188 |
+
{
|
| 7189 |
+
"epoch": 0.8,
|
| 7190 |
+
"grad_norm": 1.052581548690796,
|
| 7191 |
+
"learning_rate": 2.339555568810221e-06,
|
| 7192 |
+
"loss": 0.0057,
|
| 7193 |
+
"step": 944
|
| 7194 |
+
},
|
| 7195 |
+
{
|
| 7196 |
+
"epoch": 0.8008474576271186,
|
| 7197 |
+
"grad_norm": 0.27318713068962097,
|
| 7198 |
+
"learning_rate": 2.3205742664922006e-06,
|
| 7199 |
+
"loss": 0.0011,
|
| 7200 |
+
"step": 945
|
| 7201 |
+
},
|
| 7202 |
+
{
|
| 7203 |
+
"epoch": 0.8016949152542373,
|
| 7204 |
+
"grad_norm": 2.6839377880096436,
|
| 7205 |
+
"learning_rate": 2.3016601657009364e-06,
|
| 7206 |
+
"loss": 0.0192,
|
| 7207 |
+
"step": 946
|
| 7208 |
+
},
|
| 7209 |
+
{
|
| 7210 |
+
"epoch": 0.8025423728813559,
|
| 7211 |
+
"grad_norm": 0.8619096279144287,
|
| 7212 |
+
"learning_rate": 2.282813431950952e-06,
|
| 7213 |
+
"loss": 0.0026,
|
| 7214 |
+
"step": 947
|
| 7215 |
+
},
|
| 7216 |
+
{
|
| 7217 |
+
"epoch": 0.8033898305084746,
|
| 7218 |
+
"grad_norm": 2.3613054752349854,
|
| 7219 |
+
"learning_rate": 2.264034230167246e-06,
|
| 7220 |
+
"loss": 0.0161,
|
| 7221 |
+
"step": 948
|
| 7222 |
+
},
|
| 7223 |
+
{
|
| 7224 |
+
"epoch": 0.8042372881355933,
|
| 7225 |
+
"grad_norm": 1.840660572052002,
|
| 7226 |
+
"learning_rate": 2.245322724683854e-06,
|
| 7227 |
+
"loss": 0.0191,
|
| 7228 |
+
"step": 949
|
| 7229 |
+
},
|
| 7230 |
+
{
|
| 7231 |
+
"epoch": 0.8050847457627118,
|
| 7232 |
+
"grad_norm": 1.5182996988296509,
|
| 7233 |
+
"learning_rate": 2.2266790792424096e-06,
|
| 7234 |
+
"loss": 0.0083,
|
| 7235 |
+
"step": 950
|
| 7236 |
+
},
|
| 7237 |
+
{
|
| 7238 |
+
"epoch": 0.8059322033898305,
|
| 7239 |
+
"grad_norm": 1.8400460481643677,
|
| 7240 |
+
"learning_rate": 2.208103456990719e-06,
|
| 7241 |
+
"loss": 0.0136,
|
| 7242 |
+
"step": 951
|
| 7243 |
+
},
|
| 7244 |
+
{
|
| 7245 |
+
"epoch": 0.8067796610169492,
|
| 7246 |
+
"grad_norm": 1.5054808855056763,
|
| 7247 |
+
"learning_rate": 2.1895960204813194e-06,
|
| 7248 |
+
"loss": 0.0101,
|
| 7249 |
+
"step": 952
|
| 7250 |
+
},
|
| 7251 |
+
{
|
| 7252 |
+
"epoch": 0.8076271186440678,
|
| 7253 |
+
"grad_norm": 1.5928698778152466,
|
| 7254 |
+
"learning_rate": 2.1711569316700774e-06,
|
| 7255 |
+
"loss": 0.0118,
|
| 7256 |
+
"step": 953
|
| 7257 |
+
},
|
| 7258 |
+
{
|
| 7259 |
+
"epoch": 0.8084745762711865,
|
| 7260 |
+
"grad_norm": 1.162479281425476,
|
| 7261 |
+
"learning_rate": 2.1527863519147474e-06,
|
| 7262 |
+
"loss": 0.0068,
|
| 7263 |
+
"step": 954
|
| 7264 |
+
},
|
| 7265 |
+
{
|
| 7266 |
+
"epoch": 0.809322033898305,
|
| 7267 |
+
"grad_norm": 1.07491135597229,
|
| 7268 |
+
"learning_rate": 2.1344844419735757e-06,
|
| 7269 |
+
"loss": 0.0025,
|
| 7270 |
+
"step": 955
|
| 7271 |
+
},
|
| 7272 |
+
{
|
| 7273 |
+
"epoch": 0.8101694915254237,
|
| 7274 |
+
"grad_norm": 0.22395382821559906,
|
| 7275 |
+
"learning_rate": 2.116251362003887e-06,
|
| 7276 |
+
"loss": 0.0007,
|
| 7277 |
+
"step": 956
|
| 7278 |
+
},
|
| 7279 |
+
{
|
| 7280 |
+
"epoch": 0.8110169491525424,
|
| 7281 |
+
"grad_norm": 0.4018426239490509,
|
| 7282 |
+
"learning_rate": 2.098087271560687e-06,
|
| 7283 |
+
"loss": 0.0026,
|
| 7284 |
+
"step": 957
|
| 7285 |
+
},
|
| 7286 |
+
{
|
| 7287 |
+
"epoch": 0.811864406779661,
|
| 7288 |
+
"grad_norm": 1.074708104133606,
|
| 7289 |
+
"learning_rate": 2.079992329595263e-06,
|
| 7290 |
+
"loss": 0.0035,
|
| 7291 |
+
"step": 958
|
| 7292 |
+
},
|
| 7293 |
+
{
|
| 7294 |
+
"epoch": 0.8127118644067797,
|
| 7295 |
+
"grad_norm": 1.0309704542160034,
|
| 7296 |
+
"learning_rate": 2.0619666944537954e-06,
|
| 7297 |
+
"loss": 0.0041,
|
| 7298 |
+
"step": 959
|
| 7299 |
+
},
|
| 7300 |
+
{
|
| 7301 |
+
"epoch": 0.8135593220338984,
|
| 7302 |
+
"grad_norm": 2.1775588989257812,
|
| 7303 |
+
"learning_rate": 2.044010523875969e-06,
|
| 7304 |
+
"loss": 0.0157,
|
| 7305 |
+
"step": 960
|
| 7306 |
+
},
|
| 7307 |
+
{
|
| 7308 |
+
"epoch": 0.8135593220338984,
|
| 7309 |
+
"eval_accuracy": 1.0,
|
| 7310 |
+
"eval_f1": 1.0,
|
| 7311 |
+
"eval_loss": 9.212108125211671e-05,
|
| 7312 |
+
"eval_precision": 1.0,
|
| 7313 |
+
"eval_recall": 1.0,
|
| 7314 |
+
"eval_runtime": 49.5926,
|
| 7315 |
+
"eval_samples_per_second": 5.888,
|
| 7316 |
+
"eval_steps_per_second": 0.202,
|
| 7317 |
+
"step": 960
|
| 7318 |
+
},
|
| 7319 |
+
{
|
| 7320 |
+
"epoch": 0.8144067796610169,
|
| 7321 |
+
"grad_norm": 0.222603902220726,
|
| 7322 |
+
"learning_rate": 2.0261239749935966e-06,
|
| 7323 |
+
"loss": 0.0009,
|
| 7324 |
+
"step": 961
|
| 7325 |
+
},
|
| 7326 |
+
{
|
| 7327 |
+
"epoch": 0.8152542372881356,
|
| 7328 |
+
"grad_norm": 0.21753355860710144,
|
| 7329 |
+
"learning_rate": 2.0083072043292406e-06,
|
| 7330 |
+
"loss": 0.0007,
|
| 7331 |
+
"step": 962
|
| 7332 |
+
},
|
| 7333 |
+
{
|
| 7334 |
+
"epoch": 0.8161016949152542,
|
| 7335 |
+
"grad_norm": 1.3669072389602661,
|
| 7336 |
+
"learning_rate": 1.9905603677948425e-06,
|
| 7337 |
+
"loss": 0.0065,
|
| 7338 |
+
"step": 963
|
| 7339 |
+
},
|
| 7340 |
+
{
|
| 7341 |
+
"epoch": 0.8169491525423729,
|
| 7342 |
+
"grad_norm": 2.4227099418640137,
|
| 7343 |
+
"learning_rate": 1.972883620690366e-06,
|
| 7344 |
+
"loss": 0.0253,
|
| 7345 |
+
"step": 964
|
| 7346 |
+
},
|
| 7347 |
+
{
|
| 7348 |
+
"epoch": 0.8177966101694916,
|
| 7349 |
+
"grad_norm": 0.42630961537361145,
|
| 7350 |
+
"learning_rate": 1.955277117702424e-06,
|
| 7351 |
+
"loss": 0.0013,
|
| 7352 |
+
"step": 965
|
| 7353 |
+
},
|
| 7354 |
+
{
|
| 7355 |
+
"epoch": 0.8186440677966101,
|
| 7356 |
+
"grad_norm": 1.9701416492462158,
|
| 7357 |
+
"learning_rate": 1.9377410129029407e-06,
|
| 7358 |
+
"loss": 0.011,
|
| 7359 |
+
"step": 966
|
| 7360 |
+
},
|
| 7361 |
+
{
|
| 7362 |
+
"epoch": 0.8194915254237288,
|
| 7363 |
+
"grad_norm": 2.1445109844207764,
|
| 7364 |
+
"learning_rate": 1.920275459747796e-06,
|
| 7365 |
+
"loss": 0.0132,
|
| 7366 |
+
"step": 967
|
| 7367 |
+
},
|
| 7368 |
+
{
|
| 7369 |
+
"epoch": 0.8203389830508474,
|
| 7370 |
+
"grad_norm": 1.7752200365066528,
|
| 7371 |
+
"learning_rate": 1.902880611075477e-06,
|
| 7372 |
+
"loss": 0.0069,
|
| 7373 |
+
"step": 968
|
| 7374 |
+
},
|
| 7375 |
+
{
|
| 7376 |
+
"epoch": 0.8211864406779661,
|
| 7377 |
+
"grad_norm": 0.9991908669471741,
|
| 7378 |
+
"learning_rate": 1.8855566191057538e-06,
|
| 7379 |
+
"loss": 0.0043,
|
| 7380 |
+
"step": 969
|
| 7381 |
+
},
|
| 7382 |
+
{
|
| 7383 |
+
"epoch": 0.8220338983050848,
|
| 7384 |
+
"grad_norm": 1.4875959157943726,
|
| 7385 |
+
"learning_rate": 1.868303635438332e-06,
|
| 7386 |
+
"loss": 0.0118,
|
| 7387 |
+
"step": 970
|
| 7388 |
+
},
|
| 7389 |
+
{
|
| 7390 |
+
"epoch": 0.8228813559322034,
|
| 7391 |
+
"grad_norm": 0.8871830701828003,
|
| 7392 |
+
"learning_rate": 1.8511218110515428e-06,
|
| 7393 |
+
"loss": 0.0042,
|
| 7394 |
+
"step": 971
|
| 7395 |
+
},
|
| 7396 |
+
{
|
| 7397 |
+
"epoch": 0.823728813559322,
|
| 7398 |
+
"grad_norm": 1.4304015636444092,
|
| 7399 |
+
"learning_rate": 1.8340112963009993e-06,
|
| 7400 |
+
"loss": 0.0085,
|
| 7401 |
+
"step": 972
|
| 7402 |
+
},
|
| 7403 |
+
{
|
| 7404 |
+
"epoch": 0.8245762711864407,
|
| 7405 |
+
"grad_norm": 1.7509040832519531,
|
| 7406 |
+
"learning_rate": 1.81697224091831e-06,
|
| 7407 |
+
"loss": 0.0181,
|
| 7408 |
+
"step": 973
|
| 7409 |
+
},
|
| 7410 |
+
{
|
| 7411 |
+
"epoch": 0.8254237288135593,
|
| 7412 |
+
"grad_norm": 0.6783941388130188,
|
| 7413 |
+
"learning_rate": 1.8000047940097453e-06,
|
| 7414 |
+
"loss": 0.0039,
|
| 7415 |
+
"step": 974
|
| 7416 |
+
},
|
| 7417 |
+
{
|
| 7418 |
+
"epoch": 0.826271186440678,
|
| 7419 |
+
"grad_norm": 0.9287757873535156,
|
| 7420 |
+
"learning_rate": 1.7831091040549397e-06,
|
| 7421 |
+
"loss": 0.0097,
|
| 7422 |
+
"step": 975
|
| 7423 |
+
},
|
| 7424 |
+
{
|
| 7425 |
+
"epoch": 0.8271186440677966,
|
| 7426 |
+
"grad_norm": 1.7914001941680908,
|
| 7427 |
+
"learning_rate": 1.7662853189055951e-06,
|
| 7428 |
+
"loss": 0.0152,
|
| 7429 |
+
"step": 976
|
| 7430 |
+
},
|
| 7431 |
+
{
|
| 7432 |
+
"epoch": 0.8279661016949152,
|
| 7433 |
+
"grad_norm": 0.4140421450138092,
|
| 7434 |
+
"learning_rate": 1.7495335857841855e-06,
|
| 7435 |
+
"loss": 0.0023,
|
| 7436 |
+
"step": 977
|
| 7437 |
+
},
|
| 7438 |
+
{
|
| 7439 |
+
"epoch": 0.8288135593220339,
|
| 7440 |
+
"grad_norm": 0.8546230792999268,
|
| 7441 |
+
"learning_rate": 1.7328540512826664e-06,
|
| 7442 |
+
"loss": 0.0077,
|
| 7443 |
+
"step": 978
|
| 7444 |
+
},
|
| 7445 |
+
{
|
| 7446 |
+
"epoch": 0.8296610169491525,
|
| 7447 |
+
"grad_norm": 1.1925534009933472,
|
| 7448 |
+
"learning_rate": 1.7162468613611937e-06,
|
| 7449 |
+
"loss": 0.0077,
|
| 7450 |
+
"step": 979
|
| 7451 |
+
},
|
| 7452 |
+
{
|
| 7453 |
+
"epoch": 0.8305084745762712,
|
| 7454 |
+
"grad_norm": 1.0941600799560547,
|
| 7455 |
+
"learning_rate": 1.699712161346846e-06,
|
| 7456 |
+
"loss": 0.0082,
|
| 7457 |
+
"step": 980
|
| 7458 |
+
},
|
| 7459 |
+
{
|
| 7460 |
+
"epoch": 0.8305084745762712,
|
| 7461 |
+
"eval_accuracy": 1.0,
|
| 7462 |
+
"eval_f1": 1.0,
|
| 7463 |
+
"eval_loss": 0.00011068069579778239,
|
| 7464 |
+
"eval_precision": 1.0,
|
| 7465 |
+
"eval_recall": 1.0,
|
| 7466 |
+
"eval_runtime": 49.9161,
|
| 7467 |
+
"eval_samples_per_second": 5.85,
|
| 7468 |
+
"eval_steps_per_second": 0.2,
|
| 7469 |
+
"step": 980
|
| 7470 |
+
},
|
| 7471 |
+
{
|
| 7472 |
+
"epoch": 0.8313559322033899,
|
| 7473 |
+
"grad_norm": 3.0363481044769287,
|
| 7474 |
+
"learning_rate": 1.6832500959323605e-06,
|
| 7475 |
+
"loss": 0.0313,
|
| 7476 |
+
"step": 981
|
| 7477 |
+
},
|
| 7478 |
+
{
|
| 7479 |
+
"epoch": 0.8322033898305085,
|
| 7480 |
+
"grad_norm": 1.8849022388458252,
|
| 7481 |
+
"learning_rate": 1.6668608091748495e-06,
|
| 7482 |
+
"loss": 0.007,
|
| 7483 |
+
"step": 982
|
| 7484 |
+
},
|
| 7485 |
+
{
|
| 7486 |
+
"epoch": 0.8330508474576271,
|
| 7487 |
+
"grad_norm": 0.2518068253993988,
|
| 7488 |
+
"learning_rate": 1.6505444444945584e-06,
|
| 7489 |
+
"loss": 0.0009,
|
| 7490 |
+
"step": 983
|
| 7491 |
+
},
|
| 7492 |
+
{
|
| 7493 |
+
"epoch": 0.8338983050847457,
|
| 7494 |
+
"grad_norm": 0.548155665397644,
|
| 7495 |
+
"learning_rate": 1.6343011446735925e-06,
|
| 7496 |
+
"loss": 0.0024,
|
| 7497 |
+
"step": 984
|
| 7498 |
+
},
|
| 7499 |
+
{
|
| 7500 |
+
"epoch": 0.8347457627118644,
|
| 7501 |
+
"grad_norm": 1.4121159315109253,
|
| 7502 |
+
"learning_rate": 1.6181310518546856e-06,
|
| 7503 |
+
"loss": 0.0082,
|
| 7504 |
+
"step": 985
|
| 7505 |
+
},
|
| 7506 |
+
{
|
| 7507 |
+
"epoch": 0.8355932203389831,
|
| 7508 |
+
"grad_norm": 3.7406160831451416,
|
| 7509 |
+
"learning_rate": 1.6020343075399425e-06,
|
| 7510 |
+
"loss": 0.0086,
|
| 7511 |
+
"step": 986
|
| 7512 |
+
},
|
| 7513 |
+
{
|
| 7514 |
+
"epoch": 0.8364406779661017,
|
| 7515 |
+
"grad_norm": 0.4382129907608032,
|
| 7516 |
+
"learning_rate": 1.5860110525896143e-06,
|
| 7517 |
+
"loss": 0.0032,
|
| 7518 |
+
"step": 987
|
| 7519 |
+
},
|
| 7520 |
+
{
|
| 7521 |
+
"epoch": 0.8372881355932204,
|
| 7522 |
+
"grad_norm": 1.0554977655410767,
|
| 7523 |
+
"learning_rate": 1.5700614272208492e-06,
|
| 7524 |
+
"loss": 0.0042,
|
| 7525 |
+
"step": 988
|
| 7526 |
+
},
|
| 7527 |
+
{
|
| 7528 |
+
"epoch": 0.838135593220339,
|
| 7529 |
+
"grad_norm": 0.5351442694664001,
|
| 7530 |
+
"learning_rate": 1.5541855710064757e-06,
|
| 7531 |
+
"loss": 0.0021,
|
| 7532 |
+
"step": 989
|
| 7533 |
+
},
|
| 7534 |
+
{
|
| 7535 |
+
"epoch": 0.8389830508474576,
|
| 7536 |
+
"grad_norm": 1.2155871391296387,
|
| 7537 |
+
"learning_rate": 1.5383836228737815e-06,
|
| 7538 |
+
"loss": 0.0059,
|
| 7539 |
+
"step": 990
|
| 7540 |
+
},
|
| 7541 |
+
{
|
| 7542 |
+
"epoch": 0.8398305084745763,
|
| 7543 |
+
"grad_norm": 1.8322945833206177,
|
| 7544 |
+
"learning_rate": 1.522655721103291e-06,
|
| 7545 |
+
"loss": 0.0069,
|
| 7546 |
+
"step": 991
|
| 7547 |
+
},
|
| 7548 |
+
{
|
| 7549 |
+
"epoch": 0.8406779661016949,
|
| 7550 |
+
"grad_norm": 1.3039281368255615,
|
| 7551 |
+
"learning_rate": 1.5070020033275655e-06,
|
| 7552 |
+
"loss": 0.0102,
|
| 7553 |
+
"step": 992
|
| 7554 |
+
},
|
| 7555 |
+
{
|
| 7556 |
+
"epoch": 0.8415254237288136,
|
| 7557 |
+
"grad_norm": 1.6748837232589722,
|
| 7558 |
+
"learning_rate": 1.4914226065299886e-06,
|
| 7559 |
+
"loss": 0.0059,
|
| 7560 |
+
"step": 993
|
| 7561 |
+
},
|
| 7562 |
+
{
|
| 7563 |
+
"epoch": 0.8423728813559322,
|
| 7564 |
+
"grad_norm": 0.4845666289329529,
|
| 7565 |
+
"learning_rate": 1.475917667043575e-06,
|
| 7566 |
+
"loss": 0.0019,
|
| 7567 |
+
"step": 994
|
| 7568 |
+
},
|
| 7569 |
+
{
|
| 7570 |
+
"epoch": 0.8432203389830508,
|
| 7571 |
+
"grad_norm": 0.8964245915412903,
|
| 7572 |
+
"learning_rate": 1.4604873205497727e-06,
|
| 7573 |
+
"loss": 0.002,
|
| 7574 |
+
"step": 995
|
| 7575 |
+
},
|
| 7576 |
+
{
|
| 7577 |
+
"epoch": 0.8440677966101695,
|
| 7578 |
+
"grad_norm": 1.370054841041565,
|
| 7579 |
+
"learning_rate": 1.445131702077277e-06,
|
| 7580 |
+
"loss": 0.0086,
|
| 7581 |
+
"step": 996
|
| 7582 |
+
},
|
| 7583 |
+
{
|
| 7584 |
+
"epoch": 0.8449152542372881,
|
| 7585 |
+
"grad_norm": 2.0046818256378174,
|
| 7586 |
+
"learning_rate": 1.4298509460008491e-06,
|
| 7587 |
+
"loss": 0.0311,
|
| 7588 |
+
"step": 997
|
| 7589 |
+
},
|
| 7590 |
+
{
|
| 7591 |
+
"epoch": 0.8457627118644068,
|
| 7592 |
+
"grad_norm": 1.3406736850738525,
|
| 7593 |
+
"learning_rate": 1.4146451860401445e-06,
|
| 7594 |
+
"loss": 0.0075,
|
| 7595 |
+
"step": 998
|
| 7596 |
+
},
|
| 7597 |
+
{
|
| 7598 |
+
"epoch": 0.8466101694915255,
|
| 7599 |
+
"grad_norm": 0.8433687090873718,
|
| 7600 |
+
"learning_rate": 1.3995145552585321e-06,
|
| 7601 |
+
"loss": 0.0047,
|
| 7602 |
+
"step": 999
|
| 7603 |
+
},
|
| 7604 |
+
{
|
| 7605 |
+
"epoch": 0.847457627118644,
|
| 7606 |
+
"grad_norm": 2.1373324394226074,
|
| 7607 |
+
"learning_rate": 1.3844591860619382e-06,
|
| 7608 |
+
"loss": 0.0084,
|
| 7609 |
+
"step": 1000
|
| 7610 |
+
},
|
| 7611 |
+
{
|
| 7612 |
+
"epoch": 0.847457627118644,
|
| 7613 |
+
"eval_accuracy": 1.0,
|
| 7614 |
+
"eval_f1": 1.0,
|
| 7615 |
+
"eval_loss": 0.0001222841819981113,
|
| 7616 |
+
"eval_precision": 1.0,
|
| 7617 |
+
"eval_recall": 1.0,
|
| 7618 |
+
"eval_runtime": 50.0901,
|
| 7619 |
+
"eval_samples_per_second": 5.83,
|
| 7620 |
+
"eval_steps_per_second": 0.2,
|
| 7621 |
+
"step": 1000
|
| 7622 |
}
|
| 7623 |
],
|
| 7624 |
"logging_steps": 1,
|
|
|
|
| 7638 |
"attributes": {}
|
| 7639 |
}
|
| 7640 |
},
|
| 7641 |
+
"total_flos": 3.076975196163277e+17,
|
| 7642 |
"train_batch_size": 8,
|
| 7643 |
"trial_name": null,
|
| 7644 |
"trial_params": null
|