Training in progress, step 6700, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:965566f8b9a741a6f2801dc78e4fbc5ac70240c8d6d7b5570ba0182bcd9674e9
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7212ed89906b0804da8eba1f5c500d042a2a31b594b63c7afc77b7fca62b4f05
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93ad7abb665289229475a0dc55018b7ca6c10b70ef45f15c0b9b8f137cc5c291
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a28d9e945552a66feca51fc9780b294ee621de58c9db83d3aefe7462105d0d49
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49977e9fb46265ba81ad5ce120a7b938b5fafa454d7bb632a57a63f975e9f54a
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a633db66552fb787cb1151b9a3e2e30b0293e84603ef7d545351fc947c5f219
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f2168060d5d243a5dda1e0bc7482749ed6c7fc4cb39ff029c8a95d29643dcf6
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:581e13951882957811a470d66e41e45bbc9bb66544ca2d6e3568683cc9866887
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:091b7cd663405f9e474cf640b71ae20df31b45b8cceb2d74232e5c4232ae67f5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:951da6c961efeea8abd4771cf6f335146152fd6e811aedd9376cfbaf0b5c2661
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48de50c66a37a5de2f7b1873acf38375f58754d859c5eb82d7fe707070cddd0c
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2136ce865bd31b51bdee33783218e662d324835501f13ef2cf89d65f472e3f07
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f3d454f05c0bda87b3125802c8738baab69763f7e63757668c9f80a78618863
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -50179,6 +50179,766 @@
|
|
| 50179 |
"eval_samples_per_second": 5.723,
|
| 50180 |
"eval_steps_per_second": 0.197,
|
| 50181 |
"step": 6600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50182 |
}
|
| 50183 |
],
|
| 50184 |
"logging_steps": 1,
|
|
@@ -50198,7 +50958,7 @@
|
|
| 50198 |
"attributes": {}
|
| 50199 |
}
|
| 50200 |
},
|
| 50201 |
-
"total_flos": 2.
|
| 50202 |
"train_batch_size": 8,
|
| 50203 |
"trial_name": null,
|
| 50204 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9885651051272594,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6700,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 50179 |
"eval_samples_per_second": 5.723,
|
| 50180 |
"eval_steps_per_second": 0.197,
|
| 50181 |
"step": 6600
|
| 50182 |
+
},
|
| 50183 |
+
{
|
| 50184 |
+
"epoch": 0.9739579490962744,
|
| 50185 |
+
"grad_norm": 4.089635372161865,
|
| 50186 |
+
"learning_rate": 4.106580592637577e-08,
|
| 50187 |
+
"loss": 0.0861,
|
| 50188 |
+
"step": 6601
|
| 50189 |
+
},
|
| 50190 |
+
{
|
| 50191 |
+
"epoch": 0.9741054961268905,
|
| 50192 |
+
"grad_norm": 3.2651960849761963,
|
| 50193 |
+
"learning_rate": 4.060078990829719e-08,
|
| 50194 |
+
"loss": 0.0572,
|
| 50195 |
+
"step": 6602
|
| 50196 |
+
},
|
| 50197 |
+
{
|
| 50198 |
+
"epoch": 0.9742530431575065,
|
| 50199 |
+
"grad_norm": 1.9765956401824951,
|
| 50200 |
+
"learning_rate": 4.0138416393955545e-08,
|
| 50201 |
+
"loss": 0.0234,
|
| 50202 |
+
"step": 6603
|
| 50203 |
+
},
|
| 50204 |
+
{
|
| 50205 |
+
"epoch": 0.9744005901881224,
|
| 50206 |
+
"grad_norm": 3.121824264526367,
|
| 50207 |
+
"learning_rate": 3.967868550602827e-08,
|
| 50208 |
+
"loss": 0.0691,
|
| 50209 |
+
"step": 6604
|
| 50210 |
+
},
|
| 50211 |
+
{
|
| 50212 |
+
"epoch": 0.9745481372187385,
|
| 50213 |
+
"grad_norm": 2.891749620437622,
|
| 50214 |
+
"learning_rate": 3.922159736649889e-08,
|
| 50215 |
+
"loss": 0.0814,
|
| 50216 |
+
"step": 6605
|
| 50217 |
+
},
|
| 50218 |
+
{
|
| 50219 |
+
"epoch": 0.9746956842493545,
|
| 50220 |
+
"grad_norm": 3.361797332763672,
|
| 50221 |
+
"learning_rate": 3.8767152096641504e-08,
|
| 50222 |
+
"loss": 0.0982,
|
| 50223 |
+
"step": 6606
|
| 50224 |
+
},
|
| 50225 |
+
{
|
| 50226 |
+
"epoch": 0.9748432312799705,
|
| 50227 |
+
"grad_norm": 2.2206366062164307,
|
| 50228 |
+
"learning_rate": 3.831534981703522e-08,
|
| 50229 |
+
"loss": 0.0196,
|
| 50230 |
+
"step": 6607
|
| 50231 |
+
},
|
| 50232 |
+
{
|
| 50233 |
+
"epoch": 0.9749907783105864,
|
| 50234 |
+
"grad_norm": 1.349016785621643,
|
| 50235 |
+
"learning_rate": 3.7866190647554145e-08,
|
| 50236 |
+
"loss": 0.0523,
|
| 50237 |
+
"step": 6608
|
| 50238 |
+
},
|
| 50239 |
+
{
|
| 50240 |
+
"epoch": 0.9751383253412025,
|
| 50241 |
+
"grad_norm": 2.72078537940979,
|
| 50242 |
+
"learning_rate": 3.7419674707374064e-08,
|
| 50243 |
+
"loss": 0.089,
|
| 50244 |
+
"step": 6609
|
| 50245 |
+
},
|
| 50246 |
+
{
|
| 50247 |
+
"epoch": 0.9752858723718185,
|
| 50248 |
+
"grad_norm": 1.52629816532135,
|
| 50249 |
+
"learning_rate": 3.697580211496798e-08,
|
| 50250 |
+
"loss": 0.0607,
|
| 50251 |
+
"step": 6610
|
| 50252 |
+
},
|
| 50253 |
+
{
|
| 50254 |
+
"epoch": 0.9754334194024346,
|
| 50255 |
+
"grad_norm": 1.2776182889938354,
|
| 50256 |
+
"learning_rate": 3.6534572988106144e-08,
|
| 50257 |
+
"loss": 0.0283,
|
| 50258 |
+
"step": 6611
|
| 50259 |
+
},
|
| 50260 |
+
{
|
| 50261 |
+
"epoch": 0.9755809664330506,
|
| 50262 |
+
"grad_norm": 1.8487846851348877,
|
| 50263 |
+
"learning_rate": 3.6095987443860445e-08,
|
| 50264 |
+
"loss": 0.0396,
|
| 50265 |
+
"step": 6612
|
| 50266 |
+
},
|
| 50267 |
+
{
|
| 50268 |
+
"epoch": 0.9757285134636665,
|
| 50269 |
+
"grad_norm": 1.9192994832992554,
|
| 50270 |
+
"learning_rate": 3.5660045598597814e-08,
|
| 50271 |
+
"loss": 0.0598,
|
| 50272 |
+
"step": 6613
|
| 50273 |
+
},
|
| 50274 |
+
{
|
| 50275 |
+
"epoch": 0.9758760604942825,
|
| 50276 |
+
"grad_norm": 1.6062878370285034,
|
| 50277 |
+
"learning_rate": 3.522674756798794e-08,
|
| 50278 |
+
"loss": 0.0665,
|
| 50279 |
+
"step": 6614
|
| 50280 |
+
},
|
| 50281 |
+
{
|
| 50282 |
+
"epoch": 0.9760236075248986,
|
| 50283 |
+
"grad_norm": 1.8371132612228394,
|
| 50284 |
+
"learning_rate": 3.479609346699553e-08,
|
| 50285 |
+
"loss": 0.0509,
|
| 50286 |
+
"step": 6615
|
| 50287 |
+
},
|
| 50288 |
+
{
|
| 50289 |
+
"epoch": 0.9761711545555146,
|
| 50290 |
+
"grad_norm": 2.38232159614563,
|
| 50291 |
+
"learning_rate": 3.4368083409885844e-08,
|
| 50292 |
+
"loss": 0.0964,
|
| 50293 |
+
"step": 6616
|
| 50294 |
+
},
|
| 50295 |
+
{
|
| 50296 |
+
"epoch": 0.9763187015861305,
|
| 50297 |
+
"grad_norm": 2.736990451812744,
|
| 50298 |
+
"learning_rate": 3.394271751021916e-08,
|
| 50299 |
+
"loss": 0.141,
|
| 50300 |
+
"step": 6617
|
| 50301 |
+
},
|
| 50302 |
+
{
|
| 50303 |
+
"epoch": 0.9764662486167466,
|
| 50304 |
+
"grad_norm": 6.989748954772949,
|
| 50305 |
+
"learning_rate": 3.351999588085963e-08,
|
| 50306 |
+
"loss": 0.0612,
|
| 50307 |
+
"step": 6618
|
| 50308 |
+
},
|
| 50309 |
+
{
|
| 50310 |
+
"epoch": 0.9766137956473626,
|
| 50311 |
+
"grad_norm": 1.7669501304626465,
|
| 50312 |
+
"learning_rate": 3.309991863396644e-08,
|
| 50313 |
+
"loss": 0.0488,
|
| 50314 |
+
"step": 6619
|
| 50315 |
+
},
|
| 50316 |
+
{
|
| 50317 |
+
"epoch": 0.9767613426779787,
|
| 50318 |
+
"grad_norm": 1.4926352500915527,
|
| 50319 |
+
"learning_rate": 3.2682485880997096e-08,
|
| 50320 |
+
"loss": 0.0275,
|
| 50321 |
+
"step": 6620
|
| 50322 |
+
},
|
| 50323 |
+
{
|
| 50324 |
+
"epoch": 0.9767613426779787,
|
| 50325 |
+
"eval_accuracy": 0.9797395079594791,
|
| 50326 |
+
"eval_f1": 0.9653465346534653,
|
| 50327 |
+
"eval_loss": 0.055932920426130295,
|
| 50328 |
+
"eval_precision": 0.9848484848484849,
|
| 50329 |
+
"eval_recall": 0.9466019417475728,
|
| 50330 |
+
"eval_runtime": 49.2316,
|
| 50331 |
+
"eval_samples_per_second": 5.911,
|
| 50332 |
+
"eval_steps_per_second": 0.203,
|
| 50333 |
+
"step": 6620
|
| 50334 |
+
},
|
| 50335 |
+
{
|
| 50336 |
+
"epoch": 0.9769088897085946,
|
| 50337 |
+
"grad_norm": 3.745084047317505,
|
| 50338 |
+
"learning_rate": 3.226769773270855e-08,
|
| 50339 |
+
"loss": 0.0572,
|
| 50340 |
+
"step": 6621
|
| 50341 |
+
},
|
| 50342 |
+
{
|
| 50343 |
+
"epoch": 0.9770564367392106,
|
| 50344 |
+
"grad_norm": 1.6596072912216187,
|
| 50345 |
+
"learning_rate": 3.1855554299156096e-08,
|
| 50346 |
+
"loss": 0.0188,
|
| 50347 |
+
"step": 6622
|
| 50348 |
+
},
|
| 50349 |
+
{
|
| 50350 |
+
"epoch": 0.9772039837698266,
|
| 50351 |
+
"grad_norm": 2.8496854305267334,
|
| 50352 |
+
"learning_rate": 3.1446055689690056e-08,
|
| 50353 |
+
"loss": 0.0525,
|
| 50354 |
+
"step": 6623
|
| 50355 |
+
},
|
| 50356 |
+
{
|
| 50357 |
+
"epoch": 0.9773515308004427,
|
| 50358 |
+
"grad_norm": 2.801842451095581,
|
| 50359 |
+
"learning_rate": 3.103920201296462e-08,
|
| 50360 |
+
"loss": 0.0328,
|
| 50361 |
+
"step": 6624
|
| 50362 |
+
},
|
| 50363 |
+
{
|
| 50364 |
+
"epoch": 0.9774990778310586,
|
| 50365 |
+
"grad_norm": 2.546543598175049,
|
| 50366 |
+
"learning_rate": 3.063499337692788e-08,
|
| 50367 |
+
"loss": 0.0721,
|
| 50368 |
+
"step": 6625
|
| 50369 |
+
},
|
| 50370 |
+
{
|
| 50371 |
+
"epoch": 0.9776466248616746,
|
| 50372 |
+
"grad_norm": 2.072725534439087,
|
| 50373 |
+
"learning_rate": 3.023342988882849e-08,
|
| 50374 |
+
"loss": 0.037,
|
| 50375 |
+
"step": 6626
|
| 50376 |
+
},
|
| 50377 |
+
{
|
| 50378 |
+
"epoch": 0.9777941718922907,
|
| 50379 |
+
"grad_norm": 2.3779847621917725,
|
| 50380 |
+
"learning_rate": 2.983451165521123e-08,
|
| 50381 |
+
"loss": 0.0952,
|
| 50382 |
+
"step": 6627
|
| 50383 |
+
},
|
| 50384 |
+
{
|
| 50385 |
+
"epoch": 0.9779417189229067,
|
| 50386 |
+
"grad_norm": 2.976325035095215,
|
| 50387 |
+
"learning_rate": 2.9438238781921424e-08,
|
| 50388 |
+
"loss": 0.071,
|
| 50389 |
+
"step": 6628
|
| 50390 |
+
},
|
| 50391 |
+
{
|
| 50392 |
+
"epoch": 0.9780892659535226,
|
| 50393 |
+
"grad_norm": 3.863071918487549,
|
| 50394 |
+
"learning_rate": 2.9044611374099418e-08,
|
| 50395 |
+
"loss": 0.1468,
|
| 50396 |
+
"step": 6629
|
| 50397 |
+
},
|
| 50398 |
+
{
|
| 50399 |
+
"epoch": 0.9782368129841387,
|
| 50400 |
+
"grad_norm": 4.173577785491943,
|
| 50401 |
+
"learning_rate": 2.8653629536187222e-08,
|
| 50402 |
+
"loss": 0.0564,
|
| 50403 |
+
"step": 6630
|
| 50404 |
+
},
|
| 50405 |
+
{
|
| 50406 |
+
"epoch": 0.9783843600147547,
|
| 50407 |
+
"grad_norm": 3.290264844894409,
|
| 50408 |
+
"learning_rate": 2.8265293371922965e-08,
|
| 50409 |
+
"loss": 0.0889,
|
| 50410 |
+
"step": 6631
|
| 50411 |
+
},
|
| 50412 |
+
{
|
| 50413 |
+
"epoch": 0.9785319070453707,
|
| 50414 |
+
"grad_norm": 1.5761719942092896,
|
| 50415 |
+
"learning_rate": 2.7879602984342002e-08,
|
| 50416 |
+
"loss": 0.0203,
|
| 50417 |
+
"step": 6632
|
| 50418 |
+
},
|
| 50419 |
+
{
|
| 50420 |
+
"epoch": 0.9786794540759867,
|
| 50421 |
+
"grad_norm": 3.7511749267578125,
|
| 50422 |
+
"learning_rate": 2.7496558475778035e-08,
|
| 50423 |
+
"loss": 0.0736,
|
| 50424 |
+
"step": 6633
|
| 50425 |
+
},
|
| 50426 |
+
{
|
| 50427 |
+
"epoch": 0.9788270011066027,
|
| 50428 |
+
"grad_norm": 4.070005893707275,
|
| 50429 |
+
"learning_rate": 2.7116159947865318e-08,
|
| 50430 |
+
"loss": 0.0997,
|
| 50431 |
+
"step": 6634
|
| 50432 |
+
},
|
| 50433 |
+
{
|
| 50434 |
+
"epoch": 0.9789745481372187,
|
| 50435 |
+
"grad_norm": 2.2428393363952637,
|
| 50436 |
+
"learning_rate": 2.6738407501533113e-08,
|
| 50437 |
+
"loss": 0.064,
|
| 50438 |
+
"step": 6635
|
| 50439 |
+
},
|
| 50440 |
+
{
|
| 50441 |
+
"epoch": 0.9791220951678348,
|
| 50442 |
+
"grad_norm": 1.4023271799087524,
|
| 50443 |
+
"learning_rate": 2.636330123701014e-08,
|
| 50444 |
+
"loss": 0.0415,
|
| 50445 |
+
"step": 6636
|
| 50446 |
+
},
|
| 50447 |
+
{
|
| 50448 |
+
"epoch": 0.9792696421984508,
|
| 50449 |
+
"grad_norm": 1.616129755973816,
|
| 50450 |
+
"learning_rate": 2.599084125382123e-08,
|
| 50451 |
+
"loss": 0.0531,
|
| 50452 |
+
"step": 6637
|
| 50453 |
+
},
|
| 50454 |
+
{
|
| 50455 |
+
"epoch": 0.9794171892290667,
|
| 50456 |
+
"grad_norm": 4.515521049499512,
|
| 50457 |
+
"learning_rate": 2.5621027650790664e-08,
|
| 50458 |
+
"loss": 0.2382,
|
| 50459 |
+
"step": 6638
|
| 50460 |
+
},
|
| 50461 |
+
{
|
| 50462 |
+
"epoch": 0.9795647362596828,
|
| 50463 |
+
"grad_norm": 2.131122589111328,
|
| 50464 |
+
"learning_rate": 2.5253860526042173e-08,
|
| 50465 |
+
"loss": 0.0389,
|
| 50466 |
+
"step": 6639
|
| 50467 |
+
},
|
| 50468 |
+
{
|
| 50469 |
+
"epoch": 0.9797122832902988,
|
| 50470 |
+
"grad_norm": 1.6050862073898315,
|
| 50471 |
+
"learning_rate": 2.4889339976992277e-08,
|
| 50472 |
+
"loss": 0.0358,
|
| 50473 |
+
"step": 6640
|
| 50474 |
+
},
|
| 50475 |
+
{
|
| 50476 |
+
"epoch": 0.9797122832902988,
|
| 50477 |
+
"eval_accuracy": 0.9782923299565847,
|
| 50478 |
+
"eval_f1": 0.9629629629629629,
|
| 50479 |
+
"eval_loss": 0.05516430363059044,
|
| 50480 |
+
"eval_precision": 0.9798994974874372,
|
| 50481 |
+
"eval_recall": 0.9466019417475728,
|
| 50482 |
+
"eval_runtime": 49.5399,
|
| 50483 |
+
"eval_samples_per_second": 5.874,
|
| 50484 |
+
"eval_steps_per_second": 0.202,
|
| 50485 |
+
"step": 6640
|
| 50486 |
+
},
|
| 50487 |
+
{
|
| 50488 |
+
"epoch": 0.9798598303209148,
|
| 50489 |
+
"grad_norm": 1.8880443572998047,
|
| 50490 |
+
"learning_rate": 2.4527466100360277e-08,
|
| 50491 |
+
"loss": 0.0747,
|
| 50492 |
+
"step": 6641
|
| 50493 |
+
},
|
| 50494 |
+
{
|
| 50495 |
+
"epoch": 0.9800073773515308,
|
| 50496 |
+
"grad_norm": 5.500354290008545,
|
| 50497 |
+
"learning_rate": 2.4168238992160477e-08,
|
| 50498 |
+
"loss": 0.0474,
|
| 50499 |
+
"step": 6642
|
| 50500 |
+
},
|
| 50501 |
+
{
|
| 50502 |
+
"epoch": 0.9801549243821468,
|
| 50503 |
+
"grad_norm": 2.404766321182251,
|
| 50504 |
+
"learning_rate": 2.3811658747705525e-08,
|
| 50505 |
+
"loss": 0.0494,
|
| 50506 |
+
"step": 6643
|
| 50507 |
+
},
|
| 50508 |
+
{
|
| 50509 |
+
"epoch": 0.9803024714127628,
|
| 50510 |
+
"grad_norm": 2.824960947036743,
|
| 50511 |
+
"learning_rate": 2.3457725461607518e-08,
|
| 50512 |
+
"loss": 0.074,
|
| 50513 |
+
"step": 6644
|
| 50514 |
+
},
|
| 50515 |
+
{
|
| 50516 |
+
"epoch": 0.9804500184433789,
|
| 50517 |
+
"grad_norm": 1.472124457359314,
|
| 50518 |
+
"learning_rate": 2.3106439227773558e-08,
|
| 50519 |
+
"loss": 0.0277,
|
| 50520 |
+
"step": 6645
|
| 50521 |
+
},
|
| 50522 |
+
{
|
| 50523 |
+
"epoch": 0.9805975654739948,
|
| 50524 |
+
"grad_norm": 0.9315122365951538,
|
| 50525 |
+
"learning_rate": 2.27578001394102e-08,
|
| 50526 |
+
"loss": 0.0097,
|
| 50527 |
+
"step": 6646
|
| 50528 |
+
},
|
| 50529 |
+
{
|
| 50530 |
+
"epoch": 0.9807451125046108,
|
| 50531 |
+
"grad_norm": 2.713543176651001,
|
| 50532 |
+
"learning_rate": 2.241180828902012e-08,
|
| 50533 |
+
"loss": 0.0622,
|
| 50534 |
+
"step": 6647
|
| 50535 |
+
},
|
| 50536 |
+
{
|
| 50537 |
+
"epoch": 0.9808926595352269,
|
| 50538 |
+
"grad_norm": 5.194150447845459,
|
| 50539 |
+
"learning_rate": 2.2068463768405435e-08,
|
| 50540 |
+
"loss": 0.0851,
|
| 50541 |
+
"step": 6648
|
| 50542 |
+
},
|
| 50543 |
+
{
|
| 50544 |
+
"epoch": 0.9810402065658429,
|
| 50545 |
+
"grad_norm": 5.96819543838501,
|
| 50546 |
+
"learning_rate": 2.1727766668664385e-08,
|
| 50547 |
+
"loss": 0.0849,
|
| 50548 |
+
"step": 6649
|
| 50549 |
+
},
|
| 50550 |
+
{
|
| 50551 |
+
"epoch": 0.9811877535964588,
|
| 50552 |
+
"grad_norm": 1.1071208715438843,
|
| 50553 |
+
"learning_rate": 2.138971708019355e-08,
|
| 50554 |
+
"loss": 0.0268,
|
| 50555 |
+
"step": 6650
|
| 50556 |
+
},
|
| 50557 |
+
{
|
| 50558 |
+
"epoch": 0.9813353006270749,
|
| 50559 |
+
"grad_norm": 2.806211471557617,
|
| 50560 |
+
"learning_rate": 2.105431509268563e-08,
|
| 50561 |
+
"loss": 0.0916,
|
| 50562 |
+
"step": 6651
|
| 50563 |
+
},
|
| 50564 |
+
{
|
| 50565 |
+
"epoch": 0.9814828476576909,
|
| 50566 |
+
"grad_norm": 3.1690165996551514,
|
| 50567 |
+
"learning_rate": 2.0721560795133876e-08,
|
| 50568 |
+
"loss": 0.0993,
|
| 50569 |
+
"step": 6652
|
| 50570 |
+
},
|
| 50571 |
+
{
|
| 50572 |
+
"epoch": 0.9816303946883069,
|
| 50573 |
+
"grad_norm": 2.2227795124053955,
|
| 50574 |
+
"learning_rate": 2.0391454275827673e-08,
|
| 50575 |
+
"loss": 0.0388,
|
| 50576 |
+
"step": 6653
|
| 50577 |
+
},
|
| 50578 |
+
{
|
| 50579 |
+
"epoch": 0.9817779417189229,
|
| 50580 |
+
"grad_norm": 0.5616309642791748,
|
| 50581 |
+
"learning_rate": 2.0063995622350287e-08,
|
| 50582 |
+
"loss": 0.0045,
|
| 50583 |
+
"step": 6654
|
| 50584 |
+
},
|
| 50585 |
+
{
|
| 50586 |
+
"epoch": 0.9819254887495389,
|
| 50587 |
+
"grad_norm": 2.450514316558838,
|
| 50588 |
+
"learning_rate": 1.9739184921588885e-08,
|
| 50589 |
+
"loss": 0.0688,
|
| 50590 |
+
"step": 6655
|
| 50591 |
+
},
|
| 50592 |
+
{
|
| 50593 |
+
"epoch": 0.9820730357801549,
|
| 50594 |
+
"grad_norm": 2.0356853008270264,
|
| 50595 |
+
"learning_rate": 1.9417022259723418e-08,
|
| 50596 |
+
"loss": 0.0511,
|
| 50597 |
+
"step": 6656
|
| 50598 |
+
},
|
| 50599 |
+
{
|
| 50600 |
+
"epoch": 0.982220582810771,
|
| 50601 |
+
"grad_norm": 2.293266773223877,
|
| 50602 |
+
"learning_rate": 1.9097507722231068e-08,
|
| 50603 |
+
"loss": 0.0289,
|
| 50604 |
+
"step": 6657
|
| 50605 |
+
},
|
| 50606 |
+
{
|
| 50607 |
+
"epoch": 0.982368129841387,
|
| 50608 |
+
"grad_norm": 2.306947708129883,
|
| 50609 |
+
"learning_rate": 1.8780641393890685e-08,
|
| 50610 |
+
"loss": 0.0559,
|
| 50611 |
+
"step": 6658
|
| 50612 |
+
},
|
| 50613 |
+
{
|
| 50614 |
+
"epoch": 0.9825156768720029,
|
| 50615 |
+
"grad_norm": 0.8441616296768188,
|
| 50616 |
+
"learning_rate": 1.84664233587728e-08,
|
| 50617 |
+
"loss": 0.0107,
|
| 50618 |
+
"step": 6659
|
| 50619 |
+
},
|
| 50620 |
+
{
|
| 50621 |
+
"epoch": 0.982663223902619,
|
| 50622 |
+
"grad_norm": 1.6219745874404907,
|
| 50623 |
+
"learning_rate": 1.815485370025072e-08,
|
| 50624 |
+
"loss": 0.0266,
|
| 50625 |
+
"step": 6660
|
| 50626 |
+
},
|
| 50627 |
+
{
|
| 50628 |
+
"epoch": 0.982663223902619,
|
| 50629 |
+
"eval_accuracy": 0.9782923299565847,
|
| 50630 |
+
"eval_f1": 0.9629629629629629,
|
| 50631 |
+
"eval_loss": 0.05498597025871277,
|
| 50632 |
+
"eval_precision": 0.9798994974874372,
|
| 50633 |
+
"eval_recall": 0.9466019417475728,
|
| 50634 |
+
"eval_runtime": 49.5621,
|
| 50635 |
+
"eval_samples_per_second": 5.871,
|
| 50636 |
+
"eval_steps_per_second": 0.202,
|
| 50637 |
+
"step": 6660
|
| 50638 |
+
},
|
| 50639 |
+
{
|
| 50640 |
+
"epoch": 0.982810770933235,
|
| 50641 |
+
"grad_norm": 1.976530909538269,
|
| 50642 |
+
"learning_rate": 1.784593250099054e-08,
|
| 50643 |
+
"loss": 0.0253,
|
| 50644 |
+
"step": 6661
|
| 50645 |
+
},
|
| 50646 |
+
{
|
| 50647 |
+
"epoch": 0.982958317963851,
|
| 50648 |
+
"grad_norm": 2.217996120452881,
|
| 50649 |
+
"learning_rate": 1.7539659842957803e-08,
|
| 50650 |
+
"loss": 0.0555,
|
| 50651 |
+
"step": 6662
|
| 50652 |
+
},
|
| 50653 |
+
{
|
| 50654 |
+
"epoch": 0.983105864994467,
|
| 50655 |
+
"grad_norm": 2.010887861251831,
|
| 50656 |
+
"learning_rate": 1.7236035807416397e-08,
|
| 50657 |
+
"loss": 0.0421,
|
| 50658 |
+
"step": 6663
|
| 50659 |
+
},
|
| 50660 |
+
{
|
| 50661 |
+
"epoch": 0.983253412025083,
|
| 50662 |
+
"grad_norm": 0.6405054926872253,
|
| 50663 |
+
"learning_rate": 1.6935060474926323e-08,
|
| 50664 |
+
"loss": 0.0071,
|
| 50665 |
+
"step": 6664
|
| 50666 |
+
},
|
| 50667 |
+
{
|
| 50668 |
+
"epoch": 0.983400959055699,
|
| 50669 |
+
"grad_norm": 2.444506883621216,
|
| 50670 |
+
"learning_rate": 1.6636733925342595e-08,
|
| 50671 |
+
"loss": 0.033,
|
| 50672 |
+
"step": 6665
|
| 50673 |
+
},
|
| 50674 |
+
{
|
| 50675 |
+
"epoch": 0.983548506086315,
|
| 50676 |
+
"grad_norm": 1.0735312700271606,
|
| 50677 |
+
"learning_rate": 1.6341056237820784e-08,
|
| 50678 |
+
"loss": 0.0151,
|
| 50679 |
+
"step": 6666
|
| 50680 |
+
},
|
| 50681 |
+
{
|
| 50682 |
+
"epoch": 0.983696053116931,
|
| 50683 |
+
"grad_norm": 2.435049533843994,
|
| 50684 |
+
"learning_rate": 1.6048027490812577e-08,
|
| 50685 |
+
"loss": 0.0543,
|
| 50686 |
+
"step": 6667
|
| 50687 |
+
},
|
| 50688 |
+
{
|
| 50689 |
+
"epoch": 0.983843600147547,
|
| 50690 |
+
"grad_norm": 2.4513931274414062,
|
| 50691 |
+
"learning_rate": 1.5757647762065786e-08,
|
| 50692 |
+
"loss": 0.0621,
|
| 50693 |
+
"step": 6668
|
| 50694 |
+
},
|
| 50695 |
+
{
|
| 50696 |
+
"epoch": 0.983991147178163,
|
| 50697 |
+
"grad_norm": 1.8004716634750366,
|
| 50698 |
+
"learning_rate": 1.5469917128626554e-08,
|
| 50699 |
+
"loss": 0.025,
|
| 50700 |
+
"step": 6669
|
| 50701 |
+
},
|
| 50702 |
+
{
|
| 50703 |
+
"epoch": 0.9841386942087791,
|
| 50704 |
+
"grad_norm": 1.50918710231781,
|
| 50705 |
+
"learning_rate": 1.518483566683826e-08,
|
| 50706 |
+
"loss": 0.0401,
|
| 50707 |
+
"step": 6670
|
| 50708 |
+
},
|
| 50709 |
+
{
|
| 50710 |
+
"epoch": 0.984286241239395,
|
| 50711 |
+
"grad_norm": 2.1539971828460693,
|
| 50712 |
+
"learning_rate": 1.4902403452339287e-08,
|
| 50713 |
+
"loss": 0.0664,
|
| 50714 |
+
"step": 6671
|
| 50715 |
+
},
|
| 50716 |
+
{
|
| 50717 |
+
"epoch": 0.984433788270011,
|
| 50718 |
+
"grad_norm": 7.559150218963623,
|
| 50719 |
+
"learning_rate": 1.4622620560069688e-08,
|
| 50720 |
+
"loss": 0.0937,
|
| 50721 |
+
"step": 6672
|
| 50722 |
+
},
|
| 50723 |
+
{
|
| 50724 |
+
"epoch": 0.9845813353006271,
|
| 50725 |
+
"grad_norm": 1.530104637145996,
|
| 50726 |
+
"learning_rate": 1.4345487064260089e-08,
|
| 50727 |
+
"loss": 0.0648,
|
| 50728 |
+
"step": 6673
|
| 50729 |
+
},
|
| 50730 |
+
{
|
| 50731 |
+
"epoch": 0.9847288823312431,
|
| 50732 |
+
"grad_norm": 1.3213176727294922,
|
| 50733 |
+
"learning_rate": 1.4071003038443887e-08,
|
| 50734 |
+
"loss": 0.0344,
|
| 50735 |
+
"step": 6674
|
| 50736 |
+
},
|
| 50737 |
+
{
|
| 50738 |
+
"epoch": 0.984876429361859,
|
| 50739 |
+
"grad_norm": 1.8271011114120483,
|
| 50740 |
+
"learning_rate": 1.3799168555449494e-08,
|
| 50741 |
+
"loss": 0.0243,
|
| 50742 |
+
"step": 6675
|
| 50743 |
+
},
|
| 50744 |
+
{
|
| 50745 |
+
"epoch": 0.9850239763924751,
|
| 50746 |
+
"grad_norm": 1.226176142692566,
|
| 50747 |
+
"learning_rate": 1.3529983687400328e-08,
|
| 50748 |
+
"loss": 0.0178,
|
| 50749 |
+
"step": 6676
|
| 50750 |
+
},
|
| 50751 |
+
{
|
| 50752 |
+
"epoch": 0.9851715234230911,
|
| 50753 |
+
"grad_norm": 0.6308827996253967,
|
| 50754 |
+
"learning_rate": 1.3263448505720366e-08,
|
| 50755 |
+
"loss": 0.007,
|
| 50756 |
+
"step": 6677
|
| 50757 |
+
},
|
| 50758 |
+
{
|
| 50759 |
+
"epoch": 0.9853190704537071,
|
| 50760 |
+
"grad_norm": 2.996870517730713,
|
| 50761 |
+
"learning_rate": 1.2999563081127486e-08,
|
| 50762 |
+
"loss": 0.0786,
|
| 50763 |
+
"step": 6678
|
| 50764 |
+
},
|
| 50765 |
+
{
|
| 50766 |
+
"epoch": 0.9854666174843232,
|
| 50767 |
+
"grad_norm": 2.7150681018829346,
|
| 50768 |
+
"learning_rate": 1.2738327483639013e-08,
|
| 50769 |
+
"loss": 0.0394,
|
| 50770 |
+
"step": 6679
|
| 50771 |
+
},
|
| 50772 |
+
{
|
| 50773 |
+
"epoch": 0.9856141645149391,
|
| 50774 |
+
"grad_norm": 2.043134927749634,
|
| 50775 |
+
"learning_rate": 1.2479741782566168e-08,
|
| 50776 |
+
"loss": 0.0759,
|
| 50777 |
+
"step": 6680
|
| 50778 |
+
},
|
| 50779 |
+
{
|
| 50780 |
+
"epoch": 0.9856141645149391,
|
| 50781 |
+
"eval_accuracy": 0.9782923299565847,
|
| 50782 |
+
"eval_f1": 0.9629629629629629,
|
| 50783 |
+
"eval_loss": 0.05593600869178772,
|
| 50784 |
+
"eval_precision": 0.9798994974874372,
|
| 50785 |
+
"eval_recall": 0.9466019417475728,
|
| 50786 |
+
"eval_runtime": 49.7146,
|
| 50787 |
+
"eval_samples_per_second": 5.853,
|
| 50788 |
+
"eval_steps_per_second": 0.201,
|
| 50789 |
+
"step": 6680
|
| 50790 |
+
},
|
| 50791 |
+
{
|
| 50792 |
+
"epoch": 0.9857617115455551,
|
| 50793 |
+
"grad_norm": 1.8694920539855957,
|
| 50794 |
+
"learning_rate": 1.2223806046520737e-08,
|
| 50795 |
+
"loss": 0.0362,
|
| 50796 |
+
"step": 6681
|
| 50797 |
+
},
|
| 50798 |
+
{
|
| 50799 |
+
"epoch": 0.9859092585761712,
|
| 50800 |
+
"grad_norm": 2.6727139949798584,
|
| 50801 |
+
"learning_rate": 1.1970520343408398e-08,
|
| 50802 |
+
"loss": 0.073,
|
| 50803 |
+
"step": 6682
|
| 50804 |
+
},
|
| 50805 |
+
{
|
| 50806 |
+
"epoch": 0.9860568056067872,
|
| 50807 |
+
"grad_norm": 0.7778927683830261,
|
| 50808 |
+
"learning_rate": 1.1719884740433174e-08,
|
| 50809 |
+
"loss": 0.0056,
|
| 50810 |
+
"step": 6683
|
| 50811 |
+
},
|
| 50812 |
+
{
|
| 50813 |
+
"epoch": 0.9862043526374031,
|
| 50814 |
+
"grad_norm": 2.3464653491973877,
|
| 50815 |
+
"learning_rate": 1.1471899304095202e-08,
|
| 50816 |
+
"loss": 0.0314,
|
| 50817 |
+
"step": 6684
|
| 50818 |
+
},
|
| 50819 |
+
{
|
| 50820 |
+
"epoch": 0.9863518996680192,
|
| 50821 |
+
"grad_norm": 0.8709948658943176,
|
| 50822 |
+
"learning_rate": 1.122656410019296e-08,
|
| 50823 |
+
"loss": 0.0199,
|
| 50824 |
+
"step": 6685
|
| 50825 |
+
},
|
| 50826 |
+
{
|
| 50827 |
+
"epoch": 0.9864994466986352,
|
| 50828 |
+
"grad_norm": 6.606779098510742,
|
| 50829 |
+
"learning_rate": 1.0983879193819936e-08,
|
| 50830 |
+
"loss": 0.108,
|
| 50831 |
+
"step": 6686
|
| 50832 |
+
},
|
| 50833 |
+
{
|
| 50834 |
+
"epoch": 0.9866469937292512,
|
| 50835 |
+
"grad_norm": 4.287250995635986,
|
| 50836 |
+
"learning_rate": 1.074384464936684e-08,
|
| 50837 |
+
"loss": 0.0716,
|
| 50838 |
+
"step": 6687
|
| 50839 |
+
},
|
| 50840 |
+
{
|
| 50841 |
+
"epoch": 0.9867945407598672,
|
| 50842 |
+
"grad_norm": 0.7073714733123779,
|
| 50843 |
+
"learning_rate": 1.0506460530521622e-08,
|
| 50844 |
+
"loss": 0.0188,
|
| 50845 |
+
"step": 6688
|
| 50846 |
+
},
|
| 50847 |
+
{
|
| 50848 |
+
"epoch": 0.9869420877904832,
|
| 50849 |
+
"grad_norm": 4.2220563888549805,
|
| 50850 |
+
"learning_rate": 1.0271726900269452e-08,
|
| 50851 |
+
"loss": 0.0769,
|
| 50852 |
+
"step": 6689
|
| 50853 |
+
},
|
| 50854 |
+
{
|
| 50855 |
+
"epoch": 0.9870896348210992,
|
| 50856 |
+
"grad_norm": 1.6127564907073975,
|
| 50857 |
+
"learning_rate": 1.003964382089162e-08,
|
| 50858 |
+
"loss": 0.0457,
|
| 50859 |
+
"step": 6690
|
| 50860 |
+
},
|
| 50861 |
+
{
|
| 50862 |
+
"epoch": 0.9872371818517153,
|
| 50863 |
+
"grad_norm": 2.2320802211761475,
|
| 50864 |
+
"learning_rate": 9.810211353965537e-09,
|
| 50865 |
+
"loss": 0.047,
|
| 50866 |
+
"step": 6691
|
| 50867 |
+
},
|
| 50868 |
+
{
|
| 50869 |
+
"epoch": 0.9873847288823312,
|
| 50870 |
+
"grad_norm": 3.913719654083252,
|
| 50871 |
+
"learning_rate": 9.583429560365843e-09,
|
| 50872 |
+
"loss": 0.0715,
|
| 50873 |
+
"step": 6692
|
| 50874 |
+
},
|
| 50875 |
+
{
|
| 50876 |
+
"epoch": 0.9875322759129472,
|
| 50877 |
+
"grad_norm": 2.9218332767486572,
|
| 50878 |
+
"learning_rate": 9.359298500264402e-09,
|
| 50879 |
+
"loss": 0.0513,
|
| 50880 |
+
"step": 6693
|
| 50881 |
+
},
|
| 50882 |
+
{
|
| 50883 |
+
"epoch": 0.9876798229435633,
|
| 50884 |
+
"grad_norm": 1.7875134944915771,
|
| 50885 |
+
"learning_rate": 9.137818233129203e-09,
|
| 50886 |
+
"loss": 0.0406,
|
| 50887 |
+
"step": 6694
|
| 50888 |
+
},
|
| 50889 |
+
{
|
| 50890 |
+
"epoch": 0.9878273699741793,
|
| 50891 |
+
"grad_norm": 2.7455263137817383,
|
| 50892 |
+
"learning_rate": 8.91898881772657e-09,
|
| 50893 |
+
"loss": 0.0704,
|
| 50894 |
+
"step": 6695
|
| 50895 |
+
},
|
| 50896 |
+
{
|
| 50897 |
+
"epoch": 0.9879749170047952,
|
| 50898 |
+
"grad_norm": 0.6625596880912781,
|
| 50899 |
+
"learning_rate": 8.702810312115618e-09,
|
| 50900 |
+
"loss": 0.006,
|
| 50901 |
+
"step": 6696
|
| 50902 |
+
},
|
| 50903 |
+
{
|
| 50904 |
+
"epoch": 0.9881224640354113,
|
| 50905 |
+
"grad_norm": 1.6851662397384644,
|
| 50906 |
+
"learning_rate": 8.489282773656016e-09,
|
| 50907 |
+
"loss": 0.0527,
|
| 50908 |
+
"step": 6697
|
| 50909 |
+
},
|
| 50910 |
+
{
|
| 50911 |
+
"epoch": 0.9882700110660273,
|
| 50912 |
+
"grad_norm": 2.4347875118255615,
|
| 50913 |
+
"learning_rate": 8.278406259001337e-09,
|
| 50914 |
+
"loss": 0.0673,
|
| 50915 |
+
"step": 6698
|
| 50916 |
+
},
|
| 50917 |
+
{
|
| 50918 |
+
"epoch": 0.9884175580966433,
|
| 50919 |
+
"grad_norm": 5.950766563415527,
|
| 50920 |
+
"learning_rate": 8.07018082410349e-09,
|
| 50921 |
+
"loss": 0.091,
|
| 50922 |
+
"step": 6699
|
| 50923 |
+
},
|
| 50924 |
+
{
|
| 50925 |
+
"epoch": 0.9885651051272594,
|
| 50926 |
+
"grad_norm": 1.7334251403808594,
|
| 50927 |
+
"learning_rate": 7.864606524211616e-09,
|
| 50928 |
+
"loss": 0.0396,
|
| 50929 |
+
"step": 6700
|
| 50930 |
+
},
|
| 50931 |
+
{
|
| 50932 |
+
"epoch": 0.9885651051272594,
|
| 50933 |
+
"eval_accuracy": 0.9797395079594791,
|
| 50934 |
+
"eval_f1": 0.9653465346534653,
|
| 50935 |
+
"eval_loss": 0.05492059141397476,
|
| 50936 |
+
"eval_precision": 0.9848484848484849,
|
| 50937 |
+
"eval_recall": 0.9466019417475728,
|
| 50938 |
+
"eval_runtime": 49.3051,
|
| 50939 |
+
"eval_samples_per_second": 5.902,
|
| 50940 |
+
"eval_steps_per_second": 0.203,
|
| 50941 |
+
"step": 6700
|
| 50942 |
}
|
| 50943 |
],
|
| 50944 |
"logging_steps": 1,
|
|
|
|
| 50958 |
"attributes": {}
|
| 50959 |
}
|
| 50960 |
},
|
| 50961 |
+
"total_flos": 2.0639708098351596e+18,
|
| 50962 |
"train_batch_size": 8,
|
| 50963 |
"trial_name": null,
|
| 50964 |
"trial_params": null
|