Training in progress, step 1300, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b360a4328f640ed51ddaf65beb21759c2322654758d2b7b7f6e00f66a17354f8
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20d1395c5a780e12bd9c2d3c0a3a98e6d11c049377ae734be8b4c6bec63af7cd
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b980d02c86a12c4ddd321afa25558b9bda6ce7377f5a7301fbc73043dd7e72fd
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4716794fe32a12753a15aca9b69a92b8ff2a13cc9a1449ccd27487d4a1ca9a7d
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c424477fe3f3aa933900f713ea30de6e63503f0eb3c14d4b5a3fd7be751453c
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:992a85fd0f9141e2a7ce8e4ce2c770b6564f0c5de13f4c613cc4d93bc456ab03
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0f386445b7a0ecca12a354673d12666bd045fe42bc66c5282186ece7173d4fd
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81c7ec7bbec3615990bf78e011b0f7bc719d60680964d34bbac0633971dd9f36
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce0c8f8d9638136cb5308b0b5847756c4993f316ede670798b5676d4508282ce
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc378caf9e3227b70a474c0063f96ad82cc21701d0d5fa1f12d57ba19770909f
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41e92489ba1b6fe609dc774dd68b88282000969f034d53fc7540c25e859de003
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:037f3e8e143701c6dab9d7f5db31ada1d1f6e223405cca2ab7ccd4b03d64aac8
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:230ef6b51382a71e81c933c6e0f89f49737687e37bb89c538f18f98f56a78ee9
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -9139,6 +9139,766 @@
|
|
| 9139 |
"eval_samples_per_second": 6.853,
|
| 9140 |
"eval_steps_per_second": 0.228,
|
| 9141 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9142 |
}
|
| 9143 |
],
|
| 9144 |
"logging_steps": 1,
|
|
@@ -9158,7 +9918,7 @@
|
|
| 9158 |
"attributes": {}
|
| 9159 |
}
|
| 9160 |
},
|
| 9161 |
-
"total_flos": 1.
|
| 9162 |
"train_batch_size": 8,
|
| 9163 |
"trial_name": null,
|
| 9164 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9950248756218906,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 1300,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 9139 |
"eval_samples_per_second": 6.853,
|
| 9140 |
"eval_steps_per_second": 0.228,
|
| 9141 |
"step": 1200
|
| 9142 |
+
},
|
| 9143 |
+
{
|
| 9144 |
+
"epoch": 0.9192499043245312,
|
| 9145 |
+
"grad_norm": 4.442579746246338,
|
| 9146 |
+
"learning_rate": 3.914879239610392e-07,
|
| 9147 |
+
"loss": 0.186,
|
| 9148 |
+
"step": 1201
|
| 9149 |
+
},
|
| 9150 |
+
{
|
| 9151 |
+
"epoch": 0.9200153080750095,
|
| 9152 |
+
"grad_norm": 5.45106315612793,
|
| 9153 |
+
"learning_rate": 3.8411440400117685e-07,
|
| 9154 |
+
"loss": 0.1837,
|
| 9155 |
+
"step": 1202
|
| 9156 |
+
},
|
| 9157 |
+
{
|
| 9158 |
+
"epoch": 0.9207807118254879,
|
| 9159 |
+
"grad_norm": 4.747509479522705,
|
| 9160 |
+
"learning_rate": 3.768096245974129e-07,
|
| 9161 |
+
"loss": 0.2562,
|
| 9162 |
+
"step": 1203
|
| 9163 |
+
},
|
| 9164 |
+
{
|
| 9165 |
+
"epoch": 0.9215461155759663,
|
| 9166 |
+
"grad_norm": 6.138671398162842,
|
| 9167 |
+
"learning_rate": 3.69573637969024e-07,
|
| 9168 |
+
"loss": 0.3244,
|
| 9169 |
+
"step": 1204
|
| 9170 |
+
},
|
| 9171 |
+
{
|
| 9172 |
+
"epoch": 0.9223115193264447,
|
| 9173 |
+
"grad_norm": 7.972070217132568,
|
| 9174 |
+
"learning_rate": 3.6240649584351137e-07,
|
| 9175 |
+
"loss": 0.4027,
|
| 9176 |
+
"step": 1205
|
| 9177 |
+
},
|
| 9178 |
+
{
|
| 9179 |
+
"epoch": 0.9230769230769231,
|
| 9180 |
+
"grad_norm": 8.4572172164917,
|
| 9181 |
+
"learning_rate": 3.553082494562354e-07,
|
| 9182 |
+
"loss": 0.4941,
|
| 9183 |
+
"step": 1206
|
| 9184 |
+
},
|
| 9185 |
+
{
|
| 9186 |
+
"epoch": 0.9238423268274014,
|
| 9187 |
+
"grad_norm": 9.352378845214844,
|
| 9188 |
+
"learning_rate": 3.4827894955003825e-07,
|
| 9189 |
+
"loss": 0.448,
|
| 9190 |
+
"step": 1207
|
| 9191 |
+
},
|
| 9192 |
+
{
|
| 9193 |
+
"epoch": 0.9246077305778798,
|
| 9194 |
+
"grad_norm": 7.637875556945801,
|
| 9195 |
+
"learning_rate": 3.413186463748941e-07,
|
| 9196 |
+
"loss": 0.2718,
|
| 9197 |
+
"step": 1208
|
| 9198 |
+
},
|
| 9199 |
+
{
|
| 9200 |
+
"epoch": 0.9253731343283582,
|
| 9201 |
+
"grad_norm": 8.596519470214844,
|
| 9202 |
+
"learning_rate": 3.3442738968754164e-07,
|
| 9203 |
+
"loss": 0.2043,
|
| 9204 |
+
"step": 1209
|
| 9205 |
+
},
|
| 9206 |
+
{
|
| 9207 |
+
"epoch": 0.9261385380788366,
|
| 9208 |
+
"grad_norm": 11.038840293884277,
|
| 9209 |
+
"learning_rate": 3.276052287511333e-07,
|
| 9210 |
+
"loss": 0.2731,
|
| 9211 |
+
"step": 1210
|
| 9212 |
+
},
|
| 9213 |
+
{
|
| 9214 |
+
"epoch": 0.926903941829315,
|
| 9215 |
+
"grad_norm": 6.149134635925293,
|
| 9216 |
+
"learning_rate": 3.2085221233487564e-07,
|
| 9217 |
+
"loss": 0.3046,
|
| 9218 |
+
"step": 1211
|
| 9219 |
+
},
|
| 9220 |
+
{
|
| 9221 |
+
"epoch": 0.9276693455797933,
|
| 9222 |
+
"grad_norm": 5.461088180541992,
|
| 9223 |
+
"learning_rate": 3.1416838871368925e-07,
|
| 9224 |
+
"loss": 0.2553,
|
| 9225 |
+
"step": 1212
|
| 9226 |
+
},
|
| 9227 |
+
{
|
| 9228 |
+
"epoch": 0.9284347493302717,
|
| 9229 |
+
"grad_norm": 8.15916919708252,
|
| 9230 |
+
"learning_rate": 3.0755380566785955e-07,
|
| 9231 |
+
"loss": 0.2793,
|
| 9232 |
+
"step": 1213
|
| 9233 |
+
},
|
| 9234 |
+
{
|
| 9235 |
+
"epoch": 0.9292001530807501,
|
| 9236 |
+
"grad_norm": 6.028532028198242,
|
| 9237 |
+
"learning_rate": 3.010085104826932e-07,
|
| 9238 |
+
"loss": 0.2108,
|
| 9239 |
+
"step": 1214
|
| 9240 |
+
},
|
| 9241 |
+
{
|
| 9242 |
+
"epoch": 0.9299655568312285,
|
| 9243 |
+
"grad_norm": 9.626595497131348,
|
| 9244 |
+
"learning_rate": 2.945325499481855e-07,
|
| 9245 |
+
"loss": 0.2889,
|
| 9246 |
+
"step": 1215
|
| 9247 |
+
},
|
| 9248 |
+
{
|
| 9249 |
+
"epoch": 0.9307309605817069,
|
| 9250 |
+
"grad_norm": 8.43061637878418,
|
| 9251 |
+
"learning_rate": 2.881259703586814e-07,
|
| 9252 |
+
"loss": 0.3819,
|
| 9253 |
+
"step": 1216
|
| 9254 |
+
},
|
| 9255 |
+
{
|
| 9256 |
+
"epoch": 0.9314963643321852,
|
| 9257 |
+
"grad_norm": 9.330650329589844,
|
| 9258 |
+
"learning_rate": 2.817888175125472e-07,
|
| 9259 |
+
"loss": 0.2979,
|
| 9260 |
+
"step": 1217
|
| 9261 |
+
},
|
| 9262 |
+
{
|
| 9263 |
+
"epoch": 0.9322617680826636,
|
| 9264 |
+
"grad_norm": 6.501589775085449,
|
| 9265 |
+
"learning_rate": 2.7552113671184264e-07,
|
| 9266 |
+
"loss": 0.293,
|
| 9267 |
+
"step": 1218
|
| 9268 |
+
},
|
| 9269 |
+
{
|
| 9270 |
+
"epoch": 0.933027171833142,
|
| 9271 |
+
"grad_norm": 6.367552757263184,
|
| 9272 |
+
"learning_rate": 2.693229727619906e-07,
|
| 9273 |
+
"loss": 0.3728,
|
| 9274 |
+
"step": 1219
|
| 9275 |
+
},
|
| 9276 |
+
{
|
| 9277 |
+
"epoch": 0.9337925755836204,
|
| 9278 |
+
"grad_norm": 6.511219501495361,
|
| 9279 |
+
"learning_rate": 2.631943699714712e-07,
|
| 9280 |
+
"loss": 0.2681,
|
| 9281 |
+
"step": 1220
|
| 9282 |
+
},
|
| 9283 |
+
{
|
| 9284 |
+
"epoch": 0.9337925755836204,
|
| 9285 |
+
"eval_accuracy": 0.8898916967509025,
|
| 9286 |
+
"eval_f1": 0.8390501319261213,
|
| 9287 |
+
"eval_loss": 0.2956056296825409,
|
| 9288 |
+
"eval_precision": 0.8932584269662921,
|
| 9289 |
+
"eval_recall": 0.7910447761194029,
|
| 9290 |
+
"eval_runtime": 43.3109,
|
| 9291 |
+
"eval_samples_per_second": 6.95,
|
| 9292 |
+
"eval_steps_per_second": 0.231,
|
| 9293 |
+
"step": 1220
|
| 9294 |
+
},
|
| 9295 |
+
{
|
| 9296 |
+
"epoch": 0.9345579793340988,
|
| 9297 |
+
"grad_norm": 5.723000526428223,
|
| 9298 |
+
"learning_rate": 2.571353721514913e-07,
|
| 9299 |
+
"loss": 0.2749,
|
| 9300 |
+
"step": 1221
|
| 9301 |
+
},
|
| 9302 |
+
{
|
| 9303 |
+
"epoch": 0.9353233830845771,
|
| 9304 |
+
"grad_norm": 8.66303825378418,
|
| 9305 |
+
"learning_rate": 2.51146022615677e-07,
|
| 9306 |
+
"loss": 0.2631,
|
| 9307 |
+
"step": 1222
|
| 9308 |
+
},
|
| 9309 |
+
{
|
| 9310 |
+
"epoch": 0.9360887868350555,
|
| 9311 |
+
"grad_norm": 6.536643981933594,
|
| 9312 |
+
"learning_rate": 2.452263641797659e-07,
|
| 9313 |
+
"loss": 0.2504,
|
| 9314 |
+
"step": 1223
|
| 9315 |
+
},
|
| 9316 |
+
{
|
| 9317 |
+
"epoch": 0.9368541905855339,
|
| 9318 |
+
"grad_norm": 5.747756481170654,
|
| 9319 |
+
"learning_rate": 2.3937643916129404e-07,
|
| 9320 |
+
"loss": 0.2857,
|
| 9321 |
+
"step": 1224
|
| 9322 |
+
},
|
| 9323 |
+
{
|
| 9324 |
+
"epoch": 0.9376195943360123,
|
| 9325 |
+
"grad_norm": 13.398006439208984,
|
| 9326 |
+
"learning_rate": 2.3359628937930422e-07,
|
| 9327 |
+
"loss": 0.4189,
|
| 9328 |
+
"step": 1225
|
| 9329 |
+
},
|
| 9330 |
+
{
|
| 9331 |
+
"epoch": 0.9383849980864907,
|
| 9332 |
+
"grad_norm": 5.998396396636963,
|
| 9333 |
+
"learning_rate": 2.2788595615403475e-07,
|
| 9334 |
+
"loss": 0.3231,
|
| 9335 |
+
"step": 1226
|
| 9336 |
+
},
|
| 9337 |
+
{
|
| 9338 |
+
"epoch": 0.939150401836969,
|
| 9339 |
+
"grad_norm": 6.068146705627441,
|
| 9340 |
+
"learning_rate": 2.222454803066332e-07,
|
| 9341 |
+
"loss": 0.3236,
|
| 9342 |
+
"step": 1227
|
| 9343 |
+
},
|
| 9344 |
+
{
|
| 9345 |
+
"epoch": 0.9399158055874474,
|
| 9346 |
+
"grad_norm": 5.644654750823975,
|
| 9347 |
+
"learning_rate": 2.16674902158861e-07,
|
| 9348 |
+
"loss": 0.3332,
|
| 9349 |
+
"step": 1228
|
| 9350 |
+
},
|
| 9351 |
+
{
|
| 9352 |
+
"epoch": 0.9406812093379258,
|
| 9353 |
+
"grad_norm": 4.82579231262207,
|
| 9354 |
+
"learning_rate": 2.111742615328083e-07,
|
| 9355 |
+
"loss": 0.2132,
|
| 9356 |
+
"step": 1229
|
| 9357 |
+
},
|
| 9358 |
+
{
|
| 9359 |
+
"epoch": 0.9414466130884042,
|
| 9360 |
+
"grad_norm": 4.6144256591796875,
|
| 9361 |
+
"learning_rate": 2.057435977506028e-07,
|
| 9362 |
+
"loss": 0.2308,
|
| 9363 |
+
"step": 1230
|
| 9364 |
+
},
|
| 9365 |
+
{
|
| 9366 |
+
"epoch": 0.9422120168388826,
|
| 9367 |
+
"grad_norm": 10.00190258026123,
|
| 9368 |
+
"learning_rate": 2.0038294963413251e-07,
|
| 9369 |
+
"loss": 0.373,
|
| 9370 |
+
"step": 1231
|
| 9371 |
+
},
|
| 9372 |
+
{
|
| 9373 |
+
"epoch": 0.9429774205893608,
|
| 9374 |
+
"grad_norm": 5.754945755004883,
|
| 9375 |
+
"learning_rate": 1.9509235550477123e-07,
|
| 9376 |
+
"loss": 0.2395,
|
| 9377 |
+
"step": 1232
|
| 9378 |
+
},
|
| 9379 |
+
{
|
| 9380 |
+
"epoch": 0.9437428243398392,
|
| 9381 |
+
"grad_norm": 6.360520362854004,
|
| 9382 |
+
"learning_rate": 1.8987185318310009e-07,
|
| 9383 |
+
"loss": 0.1902,
|
| 9384 |
+
"step": 1233
|
| 9385 |
+
},
|
| 9386 |
+
{
|
| 9387 |
+
"epoch": 0.9445082280903176,
|
| 9388 |
+
"grad_norm": 9.590492248535156,
|
| 9389 |
+
"learning_rate": 1.8472147998863877e-07,
|
| 9390 |
+
"loss": 0.3155,
|
| 9391 |
+
"step": 1234
|
| 9392 |
+
},
|
| 9393 |
+
{
|
| 9394 |
+
"epoch": 0.945273631840796,
|
| 9395 |
+
"grad_norm": 7.996187686920166,
|
| 9396 |
+
"learning_rate": 1.796412727395802e-07,
|
| 9397 |
+
"loss": 0.3433,
|
| 9398 |
+
"step": 1235
|
| 9399 |
+
},
|
| 9400 |
+
{
|
| 9401 |
+
"epoch": 0.9460390355912744,
|
| 9402 |
+
"grad_norm": 4.422671794891357,
|
| 9403 |
+
"learning_rate": 1.7463126775252192e-07,
|
| 9404 |
+
"loss": 0.237,
|
| 9405 |
+
"step": 1236
|
| 9406 |
+
},
|
| 9407 |
+
{
|
| 9408 |
+
"epoch": 0.9468044393417527,
|
| 9409 |
+
"grad_norm": 6.761044979095459,
|
| 9410 |
+
"learning_rate": 1.6969150084221399e-07,
|
| 9411 |
+
"loss": 0.3662,
|
| 9412 |
+
"step": 1237
|
| 9413 |
+
},
|
| 9414 |
+
{
|
| 9415 |
+
"epoch": 0.9475698430922311,
|
| 9416 |
+
"grad_norm": 5.3165411949157715,
|
| 9417 |
+
"learning_rate": 1.6482200732129804e-07,
|
| 9418 |
+
"loss": 0.2149,
|
| 9419 |
+
"step": 1238
|
| 9420 |
+
},
|
| 9421 |
+
{
|
| 9422 |
+
"epoch": 0.9483352468427095,
|
| 9423 |
+
"grad_norm": 8.114785194396973,
|
| 9424 |
+
"learning_rate": 1.600228220000577e-07,
|
| 9425 |
+
"loss": 0.3416,
|
| 9426 |
+
"step": 1239
|
| 9427 |
+
},
|
| 9428 |
+
{
|
| 9429 |
+
"epoch": 0.9491006505931879,
|
| 9430 |
+
"grad_norm": 10.293120384216309,
|
| 9431 |
+
"learning_rate": 1.552939791861663e-07,
|
| 9432 |
+
"loss": 0.3409,
|
| 9433 |
+
"step": 1240
|
| 9434 |
+
},
|
| 9435 |
+
{
|
| 9436 |
+
"epoch": 0.9491006505931879,
|
| 9437 |
+
"eval_accuracy": 0.8880866425992779,
|
| 9438 |
+
"eval_f1": 0.8368421052631579,
|
| 9439 |
+
"eval_loss": 0.29501873254776,
|
| 9440 |
+
"eval_precision": 0.888268156424581,
|
| 9441 |
+
"eval_recall": 0.7910447761194029,
|
| 9442 |
+
"eval_runtime": 43.815,
|
| 9443 |
+
"eval_samples_per_second": 6.87,
|
| 9444 |
+
"eval_steps_per_second": 0.228,
|
| 9445 |
+
"step": 1240
|
| 9446 |
+
},
|
| 9447 |
+
{
|
| 9448 |
+
"epoch": 0.9498660543436663,
|
| 9449 |
+
"grad_norm": 6.4339799880981445,
|
| 9450 |
+
"learning_rate": 1.5063551268444275e-07,
|
| 9451 |
+
"loss": 0.3244,
|
| 9452 |
+
"step": 1241
|
| 9453 |
+
},
|
| 9454 |
+
{
|
| 9455 |
+
"epoch": 0.9506314580941446,
|
| 9456 |
+
"grad_norm": 5.49373722076416,
|
| 9457 |
+
"learning_rate": 1.4604745579661405e-07,
|
| 9458 |
+
"loss": 0.1764,
|
| 9459 |
+
"step": 1242
|
| 9460 |
+
},
|
| 9461 |
+
{
|
| 9462 |
+
"epoch": 0.951396861844623,
|
| 9463 |
+
"grad_norm": 6.4061126708984375,
|
| 9464 |
+
"learning_rate": 1.4152984132106972e-07,
|
| 9465 |
+
"loss": 0.3189,
|
| 9466 |
+
"step": 1243
|
| 9467 |
+
},
|
| 9468 |
+
{
|
| 9469 |
+
"epoch": 0.9521622655951014,
|
| 9470 |
+
"grad_norm": 5.936630725860596,
|
| 9471 |
+
"learning_rate": 1.370827015526355e-07,
|
| 9472 |
+
"loss": 0.3355,
|
| 9473 |
+
"step": 1244
|
| 9474 |
+
},
|
| 9475 |
+
{
|
| 9476 |
+
"epoch": 0.9529276693455798,
|
| 9477 |
+
"grad_norm": 14.100617408752441,
|
| 9478 |
+
"learning_rate": 1.3270606828233668e-07,
|
| 9479 |
+
"loss": 0.5053,
|
| 9480 |
+
"step": 1245
|
| 9481 |
+
},
|
| 9482 |
+
{
|
| 9483 |
+
"epoch": 0.9536930730960582,
|
| 9484 |
+
"grad_norm": 8.441110610961914,
|
| 9485 |
+
"learning_rate": 1.2839997279717075e-07,
|
| 9486 |
+
"loss": 0.274,
|
| 9487 |
+
"step": 1246
|
| 9488 |
+
},
|
| 9489 |
+
{
|
| 9490 |
+
"epoch": 0.9544584768465365,
|
| 9491 |
+
"grad_norm": 6.178558826446533,
|
| 9492 |
+
"learning_rate": 1.241644458798885e-07,
|
| 9493 |
+
"loss": 0.2966,
|
| 9494 |
+
"step": 1247
|
| 9495 |
+
},
|
| 9496 |
+
{
|
| 9497 |
+
"epoch": 0.9552238805970149,
|
| 9498 |
+
"grad_norm": 6.316476345062256,
|
| 9499 |
+
"learning_rate": 1.1999951780876872e-07,
|
| 9500 |
+
"loss": 0.2785,
|
| 9501 |
+
"step": 1248
|
| 9502 |
+
},
|
| 9503 |
+
{
|
| 9504 |
+
"epoch": 0.9559892843474933,
|
| 9505 |
+
"grad_norm": 6.520962238311768,
|
| 9506 |
+
"learning_rate": 1.159052183574072e-07,
|
| 9507 |
+
"loss": 0.2933,
|
| 9508 |
+
"step": 1249
|
| 9509 |
+
},
|
| 9510 |
+
{
|
| 9511 |
+
"epoch": 0.9567546880979717,
|
| 9512 |
+
"grad_norm": 6.651547431945801,
|
| 9513 |
+
"learning_rate": 1.1188157679449585e-07,
|
| 9514 |
+
"loss": 0.2775,
|
| 9515 |
+
"step": 1250
|
| 9516 |
+
},
|
| 9517 |
+
{
|
| 9518 |
+
"epoch": 0.9575200918484501,
|
| 9519 |
+
"grad_norm": 5.902339935302734,
|
| 9520 |
+
"learning_rate": 1.0792862188362396e-07,
|
| 9521 |
+
"loss": 0.2386,
|
| 9522 |
+
"step": 1251
|
| 9523 |
+
},
|
| 9524 |
+
{
|
| 9525 |
+
"epoch": 0.9582854955989284,
|
| 9526 |
+
"grad_norm": 7.483514308929443,
|
| 9527 |
+
"learning_rate": 1.0404638188306504e-07,
|
| 9528 |
+
"loss": 0.2501,
|
| 9529 |
+
"step": 1252
|
| 9530 |
+
},
|
| 9531 |
+
{
|
| 9532 |
+
"epoch": 0.9590508993494068,
|
| 9533 |
+
"grad_norm": 6.495910167694092,
|
| 9534 |
+
"learning_rate": 1.002348845455725e-07,
|
| 9535 |
+
"loss": 0.3872,
|
| 9536 |
+
"step": 1253
|
| 9537 |
+
},
|
| 9538 |
+
{
|
| 9539 |
+
"epoch": 0.9598163030998852,
|
| 9540 |
+
"grad_norm": 6.121851921081543,
|
| 9541 |
+
"learning_rate": 9.64941571181921e-08,
|
| 9542 |
+
"loss": 0.3186,
|
| 9543 |
+
"step": 1254
|
| 9544 |
+
},
|
| 9545 |
+
{
|
| 9546 |
+
"epoch": 0.9605817068503636,
|
| 9547 |
+
"grad_norm": 6.671183109283447,
|
| 9548 |
+
"learning_rate": 9.282422634205645e-08,
|
| 9549 |
+
"loss": 0.2947,
|
| 9550 |
+
"step": 1255
|
| 9551 |
+
},
|
| 9552 |
+
{
|
| 9553 |
+
"epoch": 0.961347110600842,
|
| 9554 |
+
"grad_norm": 5.844105243682861,
|
| 9555 |
+
"learning_rate": 8.922511845219972e-08,
|
| 9556 |
+
"loss": 0.2272,
|
| 9557 |
+
"step": 1256
|
| 9558 |
+
},
|
| 9559 |
+
{
|
| 9560 |
+
"epoch": 0.9621125143513203,
|
| 9561 |
+
"grad_norm": 6.843101501464844,
|
| 9562 |
+
"learning_rate": 8.569685917736659e-08,
|
| 9563 |
+
"loss": 0.2826,
|
| 9564 |
+
"step": 1257
|
| 9565 |
+
},
|
| 9566 |
+
{
|
| 9567 |
+
"epoch": 0.9628779181017987,
|
| 9568 |
+
"grad_norm": 6.810047626495361,
|
| 9569 |
+
"learning_rate": 8.223947373983354e-08,
|
| 9570 |
+
"loss": 0.2737,
|
| 9571 |
+
"step": 1258
|
| 9572 |
+
},
|
| 9573 |
+
{
|
| 9574 |
+
"epoch": 0.9636433218522771,
|
| 9575 |
+
"grad_norm": 6.269131660461426,
|
| 9576 |
+
"learning_rate": 7.885298685522235e-08,
|
| 9577 |
+
"loss": 0.3041,
|
| 9578 |
+
"step": 1259
|
| 9579 |
+
},
|
| 9580 |
+
{
|
| 9581 |
+
"epoch": 0.9644087256027555,
|
| 9582 |
+
"grad_norm": 7.05451774597168,
|
| 9583 |
+
"learning_rate": 7.553742273232578e-08,
|
| 9584 |
+
"loss": 0.3316,
|
| 9585 |
+
"step": 1260
|
| 9586 |
+
},
|
| 9587 |
+
{
|
| 9588 |
+
"epoch": 0.9644087256027555,
|
| 9589 |
+
"eval_accuracy": 0.8898916967509025,
|
| 9590 |
+
"eval_f1": 0.8390501319261213,
|
| 9591 |
+
"eval_loss": 0.2938833236694336,
|
| 9592 |
+
"eval_precision": 0.8932584269662921,
|
| 9593 |
+
"eval_recall": 0.7910447761194029,
|
| 9594 |
+
"eval_runtime": 43.817,
|
| 9595 |
+
"eval_samples_per_second": 6.869,
|
| 9596 |
+
"eval_steps_per_second": 0.228,
|
| 9597 |
+
"step": 1260
|
| 9598 |
+
},
|
| 9599 |
+
{
|
| 9600 |
+
"epoch": 0.9651741293532339,
|
| 9601 |
+
"grad_norm": 7.257000923156738,
|
| 9602 |
+
"learning_rate": 7.229280507293657e-08,
|
| 9603 |
+
"loss": 0.3027,
|
| 9604 |
+
"step": 1261
|
| 9605 |
+
},
|
| 9606 |
+
{
|
| 9607 |
+
"epoch": 0.9659395331037122,
|
| 9608 |
+
"grad_norm": 8.234956741333008,
|
| 9609 |
+
"learning_rate": 6.911915707167538e-08,
|
| 9610 |
+
"loss": 0.3549,
|
| 9611 |
+
"step": 1262
|
| 9612 |
+
},
|
| 9613 |
+
{
|
| 9614 |
+
"epoch": 0.9667049368541906,
|
| 9615 |
+
"grad_norm": 6.89831018447876,
|
| 9616 |
+
"learning_rate": 6.601650141582649e-08,
|
| 9617 |
+
"loss": 0.2276,
|
| 9618 |
+
"step": 1263
|
| 9619 |
+
},
|
| 9620 |
+
{
|
| 9621 |
+
"epoch": 0.967470340604669,
|
| 9622 |
+
"grad_norm": 5.264804840087891,
|
| 9623 |
+
"learning_rate": 6.29848602851768e-08,
|
| 9624 |
+
"loss": 0.2677,
|
| 9625 |
+
"step": 1264
|
| 9626 |
+
},
|
| 9627 |
+
{
|
| 9628 |
+
"epoch": 0.9682357443551474,
|
| 9629 |
+
"grad_norm": 7.13667631149292,
|
| 9630 |
+
"learning_rate": 6.002425535185041e-08,
|
| 9631 |
+
"loss": 0.3305,
|
| 9632 |
+
"step": 1265
|
| 9633 |
+
},
|
| 9634 |
+
{
|
| 9635 |
+
"epoch": 0.9690011481056258,
|
| 9636 |
+
"grad_norm": 5.207520008087158,
|
| 9637 |
+
"learning_rate": 5.713470778016539e-08,
|
| 9638 |
+
"loss": 0.2083,
|
| 9639 |
+
"step": 1266
|
| 9640 |
+
},
|
| 9641 |
+
{
|
| 9642 |
+
"epoch": 0.969766551856104,
|
| 9643 |
+
"grad_norm": 5.961206436157227,
|
| 9644 |
+
"learning_rate": 5.4316238226469476e-08,
|
| 9645 |
+
"loss": 0.2633,
|
| 9646 |
+
"step": 1267
|
| 9647 |
+
},
|
| 9648 |
+
{
|
| 9649 |
+
"epoch": 0.9705319556065825,
|
| 9650 |
+
"grad_norm": 11.930121421813965,
|
| 9651 |
+
"learning_rate": 5.1568866839003525e-08,
|
| 9652 |
+
"loss": 0.3997,
|
| 9653 |
+
"step": 1268
|
| 9654 |
+
},
|
| 9655 |
+
{
|
| 9656 |
+
"epoch": 0.9712973593570609,
|
| 9657 |
+
"grad_norm": 6.59713077545166,
|
| 9658 |
+
"learning_rate": 4.889261325775163e-08,
|
| 9659 |
+
"loss": 0.2437,
|
| 9660 |
+
"step": 1269
|
| 9661 |
+
},
|
| 9662 |
+
{
|
| 9663 |
+
"epoch": 0.9720627631075393,
|
| 9664 |
+
"grad_norm": 7.702863693237305,
|
| 9665 |
+
"learning_rate": 4.628749661430121e-08,
|
| 9666 |
+
"loss": 0.3456,
|
| 9667 |
+
"step": 1270
|
| 9668 |
+
},
|
| 9669 |
+
{
|
| 9670 |
+
"epoch": 0.9728281668580177,
|
| 9671 |
+
"grad_norm": 7.830643177032471,
|
| 9672 |
+
"learning_rate": 4.375353553170647e-08,
|
| 9673 |
+
"loss": 0.3608,
|
| 9674 |
+
"step": 1271
|
| 9675 |
+
},
|
| 9676 |
+
{
|
| 9677 |
+
"epoch": 0.9735935706084959,
|
| 9678 |
+
"grad_norm": 7.027949333190918,
|
| 9679 |
+
"learning_rate": 4.1290748124358513e-08,
|
| 9680 |
+
"loss": 0.2728,
|
| 9681 |
+
"step": 1272
|
| 9682 |
+
},
|
| 9683 |
+
{
|
| 9684 |
+
"epoch": 0.9743589743589743,
|
| 9685 |
+
"grad_norm": 9.216780662536621,
|
| 9686 |
+
"learning_rate": 3.889915199784877e-08,
|
| 9687 |
+
"loss": 0.3055,
|
| 9688 |
+
"step": 1273
|
| 9689 |
+
},
|
| 9690 |
+
{
|
| 9691 |
+
"epoch": 0.9751243781094527,
|
| 9692 |
+
"grad_norm": 5.373678684234619,
|
| 9693 |
+
"learning_rate": 3.657876424885243e-08,
|
| 9694 |
+
"loss": 0.2806,
|
| 9695 |
+
"step": 1274
|
| 9696 |
+
},
|
| 9697 |
+
{
|
| 9698 |
+
"epoch": 0.9758897818599311,
|
| 9699 |
+
"grad_norm": 6.474977970123291,
|
| 9700 |
+
"learning_rate": 3.432960146499631e-08,
|
| 9701 |
+
"loss": 0.3257,
|
| 9702 |
+
"step": 1275
|
| 9703 |
+
},
|
| 9704 |
+
{
|
| 9705 |
+
"epoch": 0.9766551856104095,
|
| 9706 |
+
"grad_norm": 8.3179292678833,
|
| 9707 |
+
"learning_rate": 3.2151679724748974e-08,
|
| 9708 |
+
"loss": 0.3389,
|
| 9709 |
+
"step": 1276
|
| 9710 |
+
},
|
| 9711 |
+
{
|
| 9712 |
+
"epoch": 0.9774205893608878,
|
| 9713 |
+
"grad_norm": 5.711795806884766,
|
| 9714 |
+
"learning_rate": 3.0045014597299695e-08,
|
| 9715 |
+
"loss": 0.2503,
|
| 9716 |
+
"step": 1277
|
| 9717 |
+
},
|
| 9718 |
+
{
|
| 9719 |
+
"epoch": 0.9781859931113662,
|
| 9720 |
+
"grad_norm": 5.385677337646484,
|
| 9721 |
+
"learning_rate": 2.800962114245076e-08,
|
| 9722 |
+
"loss": 0.2485,
|
| 9723 |
+
"step": 1278
|
| 9724 |
+
},
|
| 9725 |
+
{
|
| 9726 |
+
"epoch": 0.9789513968618446,
|
| 9727 |
+
"grad_norm": 3.9317917823791504,
|
| 9728 |
+
"learning_rate": 2.6045513910509802e-08,
|
| 9729 |
+
"loss": 0.212,
|
| 9730 |
+
"step": 1279
|
| 9731 |
+
},
|
| 9732 |
+
{
|
| 9733 |
+
"epoch": 0.979716800612323,
|
| 9734 |
+
"grad_norm": 4.621948719024658,
|
| 9735 |
+
"learning_rate": 2.415270694217986e-08,
|
| 9736 |
+
"loss": 0.1957,
|
| 9737 |
+
"step": 1280
|
| 9738 |
+
},
|
| 9739 |
+
{
|
| 9740 |
+
"epoch": 0.979716800612323,
|
| 9741 |
+
"eval_accuracy": 0.8898916967509025,
|
| 9742 |
+
"eval_f1": 0.8390501319261213,
|
| 9743 |
+
"eval_loss": 0.2945975959300995,
|
| 9744 |
+
"eval_precision": 0.8932584269662921,
|
| 9745 |
+
"eval_recall": 0.7910447761194029,
|
| 9746 |
+
"eval_runtime": 42.919,
|
| 9747 |
+
"eval_samples_per_second": 7.013,
|
| 9748 |
+
"eval_steps_per_second": 0.233,
|
| 9749 |
+
"step": 1280
|
| 9750 |
+
},
|
| 9751 |
+
{
|
| 9752 |
+
"epoch": 0.9804822043628014,
|
| 9753 |
+
"grad_norm": 6.141805648803711,
|
| 9754 |
+
"learning_rate": 2.2331213768468363e-08,
|
| 9755 |
+
"loss": 0.2438,
|
| 9756 |
+
"step": 1281
|
| 9757 |
+
},
|
| 9758 |
+
{
|
| 9759 |
+
"epoch": 0.9812476081132797,
|
| 9760 |
+
"grad_norm": 5.874077320098877,
|
| 9761 |
+
"learning_rate": 2.0581047410583865e-08,
|
| 9762 |
+
"loss": 0.343,
|
| 9763 |
+
"step": 1282
|
| 9764 |
+
},
|
| 9765 |
+
{
|
| 9766 |
+
"epoch": 0.9820130118637581,
|
| 9767 |
+
"grad_norm": 9.686785697937012,
|
| 9768 |
+
"learning_rate": 1.8902220379846125e-08,
|
| 9769 |
+
"loss": 0.4448,
|
| 9770 |
+
"step": 1283
|
| 9771 |
+
},
|
| 9772 |
+
{
|
| 9773 |
+
"epoch": 0.9827784156142365,
|
| 9774 |
+
"grad_norm": 6.589422225952148,
|
| 9775 |
+
"learning_rate": 1.7294744677591733e-08,
|
| 9776 |
+
"loss": 0.3774,
|
| 9777 |
+
"step": 1284
|
| 9778 |
+
},
|
| 9779 |
+
{
|
| 9780 |
+
"epoch": 0.9835438193647149,
|
| 9781 |
+
"grad_norm": 7.531107425689697,
|
| 9782 |
+
"learning_rate": 1.57586317950964e-08,
|
| 9783 |
+
"loss": 0.2591,
|
| 9784 |
+
"step": 1285
|
| 9785 |
+
},
|
| 9786 |
+
{
|
| 9787 |
+
"epoch": 0.9843092231151933,
|
| 9788 |
+
"grad_norm": 6.169864654541016,
|
| 9789 |
+
"learning_rate": 1.4293892713486135e-08,
|
| 9790 |
+
"loss": 0.3366,
|
| 9791 |
+
"step": 1286
|
| 9792 |
+
},
|
| 9793 |
+
{
|
| 9794 |
+
"epoch": 0.9850746268656716,
|
| 9795 |
+
"grad_norm": 7.703701496124268,
|
| 9796 |
+
"learning_rate": 1.2900537903660637e-08,
|
| 9797 |
+
"loss": 0.2595,
|
| 9798 |
+
"step": 1287
|
| 9799 |
+
},
|
| 9800 |
+
{
|
| 9801 |
+
"epoch": 0.98584003061615,
|
| 9802 |
+
"grad_norm": 5.90448522567749,
|
| 9803 |
+
"learning_rate": 1.157857732622003e-08,
|
| 9804 |
+
"loss": 0.2492,
|
| 9805 |
+
"step": 1288
|
| 9806 |
+
},
|
| 9807 |
+
{
|
| 9808 |
+
"epoch": 0.9866054343666284,
|
| 9809 |
+
"grad_norm": 5.025811672210693,
|
| 9810 |
+
"learning_rate": 1.0328020431391583e-08,
|
| 9811 |
+
"loss": 0.2422,
|
| 9812 |
+
"step": 1289
|
| 9813 |
+
},
|
| 9814 |
+
{
|
| 9815 |
+
"epoch": 0.9873708381171068,
|
| 9816 |
+
"grad_norm": 5.388332843780518,
|
| 9817 |
+
"learning_rate": 9.148876158961983e-09,
|
| 9818 |
+
"loss": 0.2482,
|
| 9819 |
+
"step": 1290
|
| 9820 |
+
},
|
| 9821 |
+
{
|
| 9822 |
+
"epoch": 0.9881362418675852,
|
| 9823 |
+
"grad_norm": 4.219669342041016,
|
| 9824 |
+
"learning_rate": 8.041152938216278e-09,
|
| 9825 |
+
"loss": 0.2682,
|
| 9826 |
+
"step": 1291
|
| 9827 |
+
},
|
| 9828 |
+
{
|
| 9829 |
+
"epoch": 0.9889016456180635,
|
| 9830 |
+
"grad_norm": 7.032052516937256,
|
| 9831 |
+
"learning_rate": 7.004858687874594e-09,
|
| 9832 |
+
"loss": 0.2261,
|
| 9833 |
+
"step": 1292
|
| 9834 |
+
},
|
| 9835 |
+
{
|
| 9836 |
+
"epoch": 0.9896670493685419,
|
| 9837 |
+
"grad_norm": 5.230202674865723,
|
| 9838 |
+
"learning_rate": 6.040000816037728e-09,
|
| 9839 |
+
"loss": 0.2749,
|
| 9840 |
+
"step": 1293
|
| 9841 |
+
},
|
| 9842 |
+
{
|
| 9843 |
+
"epoch": 0.9904324531190203,
|
| 9844 |
+
"grad_norm": 6.469751358032227,
|
| 9845 |
+
"learning_rate": 5.146586220131644e-09,
|
| 9846 |
+
"loss": 0.1947,
|
| 9847 |
+
"step": 1294
|
| 9848 |
+
},
|
| 9849 |
+
{
|
| 9850 |
+
"epoch": 0.9911978568694987,
|
| 9851 |
+
"grad_norm": 4.652950286865234,
|
| 9852 |
+
"learning_rate": 4.324621286861952e-09,
|
| 9853 |
+
"loss": 0.1941,
|
| 9854 |
+
"step": 1295
|
| 9855 |
+
},
|
| 9856 |
+
{
|
| 9857 |
+
"epoch": 0.9919632606199771,
|
| 9858 |
+
"grad_norm": 9.259235382080078,
|
| 9859 |
+
"learning_rate": 3.5741118921628346e-09,
|
| 9860 |
+
"loss": 0.2713,
|
| 9861 |
+
"step": 1296
|
| 9862 |
+
},
|
| 9863 |
+
{
|
| 9864 |
+
"epoch": 0.9927286643704554,
|
| 9865 |
+
"grad_norm": 6.85486364364624,
|
| 9866 |
+
"learning_rate": 2.895063401160414e-09,
|
| 9867 |
+
"loss": 0.3251,
|
| 9868 |
+
"step": 1297
|
| 9869 |
+
},
|
| 9870 |
+
{
|
| 9871 |
+
"epoch": 0.9934940681209338,
|
| 9872 |
+
"grad_norm": 9.239498138427734,
|
| 9873 |
+
"learning_rate": 2.2874806681305593e-09,
|
| 9874 |
+
"loss": 0.2696,
|
| 9875 |
+
"step": 1298
|
| 9876 |
+
},
|
| 9877 |
+
{
|
| 9878 |
+
"epoch": 0.9942594718714122,
|
| 9879 |
+
"grad_norm": 4.937226295471191,
|
| 9880 |
+
"learning_rate": 1.7513680364689145e-09,
|
| 9881 |
+
"loss": 0.2714,
|
| 9882 |
+
"step": 1299
|
| 9883 |
+
},
|
| 9884 |
+
{
|
| 9885 |
+
"epoch": 0.9950248756218906,
|
| 9886 |
+
"grad_norm": 8.691539764404297,
|
| 9887 |
+
"learning_rate": 1.2867293386531476e-09,
|
| 9888 |
+
"loss": 0.2439,
|
| 9889 |
+
"step": 1300
|
| 9890 |
+
},
|
| 9891 |
+
{
|
| 9892 |
+
"epoch": 0.9950248756218906,
|
| 9893 |
+
"eval_accuracy": 0.8898916967509025,
|
| 9894 |
+
"eval_f1": 0.8390501319261213,
|
| 9895 |
+
"eval_loss": 0.2946934700012207,
|
| 9896 |
+
"eval_precision": 0.8932584269662921,
|
| 9897 |
+
"eval_recall": 0.7910447761194029,
|
| 9898 |
+
"eval_runtime": 43.3576,
|
| 9899 |
+
"eval_samples_per_second": 6.942,
|
| 9900 |
+
"eval_steps_per_second": 0.231,
|
| 9901 |
+
"step": 1300
|
| 9902 |
}
|
| 9903 |
],
|
| 9904 |
"logging_steps": 1,
|
|
|
|
| 9918 |
"attributes": {}
|
| 9919 |
}
|
| 9920 |
},
|
| 9921 |
+
"total_flos": 1.9972530726187827e+17,
|
| 9922 |
"train_batch_size": 8,
|
| 9923 |
"trial_name": null,
|
| 9924 |
"trial_params": null
|