Training in progress, step 6600, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d4fccc41669d8adadb54f68349f74f89ffff09966ac60dcb53a6e48cd78c003
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67369eaffaaf23fcc57a3579d2b350eecf84593e088e012b88be2cddfbf73336
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b50419c39b978efc4f0a7211e73d09aa76109771056a53f0af1043bfa2a908e
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:558bd7a1550e1f29246bbb3508f6e1aeea579c63ac91e9658afafb526206e361
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b3c3c85375cb3b52f1d532892946383bc9042f73634efc9351ea34228856e5f
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc71fae38f9e58f7ed5e1e8ac6eae4e0afc3c45a3119840f87936211ac808bef
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8ed0d0c7fd248cf46be28fe84a80281716dee0a1579c90e502dfbf7a133a4db
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:318d0617050b5302b7b9fd244c0bcdb8dedde6e6db48bf8d3bfab29c9662237c
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8b1b52eaa5cc0adbc5ad547706bdc14a1c79b929a785b296eb1b0d394f8b5e5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01f0a58e9f4a9804440e8394c58ad8351def40b4f77ca1177f17b91d40c5e86b
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bcc5c1e32fe134cab8ae52b6ee4359379c0b414157c020ab3e06d21256e51f1
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90a32c65375a0b35f1aa52aca5fe27b9247b98c2cd81ac883e623d8b0225929b
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab30ef4bf6ec4e411aa77a20b4b6abd224f83b1f055386091808c7312483b117
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -49419,6 +49419,766 @@
|
|
| 49419 |
"eval_samples_per_second": 5.795,
|
| 49420 |
"eval_steps_per_second": 0.199,
|
| 49421 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49422 |
}
|
| 49423 |
],
|
| 49424 |
"logging_steps": 1,
|
|
@@ -49438,7 +50198,7 @@
|
|
| 49438 |
"attributes": {}
|
| 49439 |
}
|
| 49440 |
},
|
| 49441 |
-
"total_flos": 2.
|
| 49442 |
"train_batch_size": 8,
|
| 49443 |
"trial_name": null,
|
| 49444 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9738104020656584,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6600,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 49419 |
"eval_samples_per_second": 5.795,
|
| 49420 |
"eval_steps_per_second": 0.199,
|
| 49421 |
"step": 6500
|
| 49422 |
+
},
|
| 49423 |
+
{
|
| 49424 |
+
"epoch": 0.9592032460346736,
|
| 49425 |
+
"grad_norm": 1.29558265209198,
|
| 49426 |
+
"learning_rate": 1.0088789060987203e-07,
|
| 49427 |
+
"loss": 0.0439,
|
| 49428 |
+
"step": 6501
|
| 49429 |
+
},
|
| 49430 |
+
{
|
| 49431 |
+
"epoch": 0.9593507930652896,
|
| 49432 |
+
"grad_norm": 2.059356689453125,
|
| 49433 |
+
"learning_rate": 1.001593639838705e-07,
|
| 49434 |
+
"loss": 0.0357,
|
| 49435 |
+
"step": 6502
|
| 49436 |
+
},
|
| 49437 |
+
{
|
| 49438 |
+
"epoch": 0.9594983400959056,
|
| 49439 |
+
"grad_norm": 2.024535894393921,
|
| 49440 |
+
"learning_rate": 9.943346405906995e-08,
|
| 49441 |
+
"loss": 0.0594,
|
| 49442 |
+
"step": 6503
|
| 49443 |
+
},
|
| 49444 |
+
{
|
| 49445 |
+
"epoch": 0.9596458871265215,
|
| 49446 |
+
"grad_norm": 4.6369194984436035,
|
| 49447 |
+
"learning_rate": 9.871019102807078e-08,
|
| 49448 |
+
"loss": 0.032,
|
| 49449 |
+
"step": 6504
|
| 49450 |
+
},
|
| 49451 |
+
{
|
| 49452 |
+
"epoch": 0.9597934341571376,
|
| 49453 |
+
"grad_norm": 2.1632497310638428,
|
| 49454 |
+
"learning_rate": 9.798954508277836e-08,
|
| 49455 |
+
"loss": 0.0678,
|
| 49456 |
+
"step": 6505
|
| 49457 |
+
},
|
| 49458 |
+
{
|
| 49459 |
+
"epoch": 0.9599409811877536,
|
| 49460 |
+
"grad_norm": 1.632851004600525,
|
| 49461 |
+
"learning_rate": 9.727152641439863e-08,
|
| 49462 |
+
"loss": 0.0408,
|
| 49463 |
+
"step": 6506
|
| 49464 |
+
},
|
| 49465 |
+
{
|
| 49466 |
+
"epoch": 0.9600885282183697,
|
| 49467 |
+
"grad_norm": 2.7389848232269287,
|
| 49468 |
+
"learning_rate": 9.655613521344364e-08,
|
| 49469 |
+
"loss": 0.069,
|
| 49470 |
+
"step": 6507
|
| 49471 |
+
},
|
| 49472 |
+
{
|
| 49473 |
+
"epoch": 0.9602360752489856,
|
| 49474 |
+
"grad_norm": 1.2255512475967407,
|
| 49475 |
+
"learning_rate": 9.584337166972602e-08,
|
| 49476 |
+
"loss": 0.0368,
|
| 49477 |
+
"step": 6508
|
| 49478 |
+
},
|
| 49479 |
+
{
|
| 49480 |
+
"epoch": 0.9603836222796016,
|
| 49481 |
+
"grad_norm": 1.3319123983383179,
|
| 49482 |
+
"learning_rate": 9.513323597235891e-08,
|
| 49483 |
+
"loss": 0.0386,
|
| 49484 |
+
"step": 6509
|
| 49485 |
+
},
|
| 49486 |
+
{
|
| 49487 |
+
"epoch": 0.9605311693102176,
|
| 49488 |
+
"grad_norm": 1.6762546300888062,
|
| 49489 |
+
"learning_rate": 9.442572830976604e-08,
|
| 49490 |
+
"loss": 0.0321,
|
| 49491 |
+
"step": 6510
|
| 49492 |
+
},
|
| 49493 |
+
{
|
| 49494 |
+
"epoch": 0.9606787163408337,
|
| 49495 |
+
"grad_norm": 2.6606414318084717,
|
| 49496 |
+
"learning_rate": 9.372084886966392e-08,
|
| 49497 |
+
"loss": 0.0583,
|
| 49498 |
+
"step": 6511
|
| 49499 |
+
},
|
| 49500 |
+
{
|
| 49501 |
+
"epoch": 0.9608262633714496,
|
| 49502 |
+
"grad_norm": 3.4116148948669434,
|
| 49503 |
+
"learning_rate": 9.301859783907852e-08,
|
| 49504 |
+
"loss": 0.0465,
|
| 49505 |
+
"step": 6512
|
| 49506 |
+
},
|
| 49507 |
+
{
|
| 49508 |
+
"epoch": 0.9609738104020656,
|
| 49509 |
+
"grad_norm": 3.545358657836914,
|
| 49510 |
+
"learning_rate": 9.231897540433743e-08,
|
| 49511 |
+
"loss": 0.0457,
|
| 49512 |
+
"step": 6513
|
| 49513 |
+
},
|
| 49514 |
+
{
|
| 49515 |
+
"epoch": 0.9611213574326817,
|
| 49516 |
+
"grad_norm": 4.891258239746094,
|
| 49517 |
+
"learning_rate": 9.162198175106774e-08,
|
| 49518 |
+
"loss": 0.0335,
|
| 49519 |
+
"step": 6514
|
| 49520 |
+
},
|
| 49521 |
+
{
|
| 49522 |
+
"epoch": 0.9612689044632977,
|
| 49523 |
+
"grad_norm": 1.9941608905792236,
|
| 49524 |
+
"learning_rate": 9.092761706420261e-08,
|
| 49525 |
+
"loss": 0.0461,
|
| 49526 |
+
"step": 6515
|
| 49527 |
+
},
|
| 49528 |
+
{
|
| 49529 |
+
"epoch": 0.9614164514939136,
|
| 49530 |
+
"grad_norm": 2.0398828983306885,
|
| 49531 |
+
"learning_rate": 9.02358815279758e-08,
|
| 49532 |
+
"loss": 0.0509,
|
| 49533 |
+
"step": 6516
|
| 49534 |
+
},
|
| 49535 |
+
{
|
| 49536 |
+
"epoch": 0.9615639985245297,
|
| 49537 |
+
"grad_norm": 2.310847759246826,
|
| 49538 |
+
"learning_rate": 8.95467753259227e-08,
|
| 49539 |
+
"loss": 0.0463,
|
| 49540 |
+
"step": 6517
|
| 49541 |
+
},
|
| 49542 |
+
{
|
| 49543 |
+
"epoch": 0.9617115455551457,
|
| 49544 |
+
"grad_norm": 1.7646315097808838,
|
| 49545 |
+
"learning_rate": 8.886029864088375e-08,
|
| 49546 |
+
"loss": 0.0301,
|
| 49547 |
+
"step": 6518
|
| 49548 |
+
},
|
| 49549 |
+
{
|
| 49550 |
+
"epoch": 0.9618590925857617,
|
| 49551 |
+
"grad_norm": 2.1775879859924316,
|
| 49552 |
+
"learning_rate": 8.8176451655001e-08,
|
| 49553 |
+
"loss": 0.0298,
|
| 49554 |
+
"step": 6519
|
| 49555 |
+
},
|
| 49556 |
+
{
|
| 49557 |
+
"epoch": 0.9620066396163777,
|
| 49558 |
+
"grad_norm": 2.04054856300354,
|
| 49559 |
+
"learning_rate": 8.749523454971487e-08,
|
| 49560 |
+
"loss": 0.067,
|
| 49561 |
+
"step": 6520
|
| 49562 |
+
},
|
| 49563 |
+
{
|
| 49564 |
+
"epoch": 0.9620066396163777,
|
| 49565 |
+
"eval_accuracy": 0.9797395079594791,
|
| 49566 |
+
"eval_f1": 0.9653465346534653,
|
| 49567 |
+
"eval_loss": 0.05519821122288704,
|
| 49568 |
+
"eval_precision": 0.9848484848484849,
|
| 49569 |
+
"eval_recall": 0.9466019417475728,
|
| 49570 |
+
"eval_runtime": 51.0031,
|
| 49571 |
+
"eval_samples_per_second": 5.706,
|
| 49572 |
+
"eval_steps_per_second": 0.196,
|
| 49573 |
+
"step": 6520
|
| 49574 |
+
},
|
| 49575 |
+
{
|
| 49576 |
+
"epoch": 0.9621541866469937,
|
| 49577 |
+
"grad_norm": 1.858306884765625,
|
| 49578 |
+
"learning_rate": 8.681664750577413e-08,
|
| 49579 |
+
"loss": 0.0475,
|
| 49580 |
+
"step": 6521
|
| 49581 |
+
},
|
| 49582 |
+
{
|
| 49583 |
+
"epoch": 0.9623017336776097,
|
| 49584 |
+
"grad_norm": 1.6353979110717773,
|
| 49585 |
+
"learning_rate": 8.614069070322473e-08,
|
| 49586 |
+
"loss": 0.0516,
|
| 49587 |
+
"step": 6522
|
| 49588 |
+
},
|
| 49589 |
+
{
|
| 49590 |
+
"epoch": 0.9624492807082258,
|
| 49591 |
+
"grad_norm": 2.2677900791168213,
|
| 49592 |
+
"learning_rate": 8.546736432141656e-08,
|
| 49593 |
+
"loss": 0.0316,
|
| 49594 |
+
"step": 6523
|
| 49595 |
+
},
|
| 49596 |
+
{
|
| 49597 |
+
"epoch": 0.9625968277388418,
|
| 49598 |
+
"grad_norm": 2.1024563312530518,
|
| 49599 |
+
"learning_rate": 8.479666853900448e-08,
|
| 49600 |
+
"loss": 0.0266,
|
| 49601 |
+
"step": 6524
|
| 49602 |
+
},
|
| 49603 |
+
{
|
| 49604 |
+
"epoch": 0.9627443747694577,
|
| 49605 |
+
"grad_norm": 1.6655795574188232,
|
| 49606 |
+
"learning_rate": 8.412860353393947e-08,
|
| 49607 |
+
"loss": 0.0227,
|
| 49608 |
+
"step": 6525
|
| 49609 |
+
},
|
| 49610 |
+
{
|
| 49611 |
+
"epoch": 0.9628919218000738,
|
| 49612 |
+
"grad_norm": 3.0254976749420166,
|
| 49613 |
+
"learning_rate": 8.346316948347865e-08,
|
| 49614 |
+
"loss": 0.0635,
|
| 49615 |
+
"step": 6526
|
| 49616 |
+
},
|
| 49617 |
+
{
|
| 49618 |
+
"epoch": 0.9630394688306898,
|
| 49619 |
+
"grad_norm": 1.5075454711914062,
|
| 49620 |
+
"learning_rate": 8.280036656418078e-08,
|
| 49621 |
+
"loss": 0.0412,
|
| 49622 |
+
"step": 6527
|
| 49623 |
+
},
|
| 49624 |
+
{
|
| 49625 |
+
"epoch": 0.9631870158613058,
|
| 49626 |
+
"grad_norm": 1.4006476402282715,
|
| 49627 |
+
"learning_rate": 8.214019495190407e-08,
|
| 49628 |
+
"loss": 0.0334,
|
| 49629 |
+
"step": 6528
|
| 49630 |
+
},
|
| 49631 |
+
{
|
| 49632 |
+
"epoch": 0.9633345628919218,
|
| 49633 |
+
"grad_norm": 2.3075265884399414,
|
| 49634 |
+
"learning_rate": 8.148265482181173e-08,
|
| 49635 |
+
"loss": 0.0269,
|
| 49636 |
+
"step": 6529
|
| 49637 |
+
},
|
| 49638 |
+
{
|
| 49639 |
+
"epoch": 0.9634821099225378,
|
| 49640 |
+
"grad_norm": 1.0943922996520996,
|
| 49641 |
+
"learning_rate": 8.082774634836754e-08,
|
| 49642 |
+
"loss": 0.0353,
|
| 49643 |
+
"step": 6530
|
| 49644 |
+
},
|
| 49645 |
+
{
|
| 49646 |
+
"epoch": 0.9636296569531538,
|
| 49647 |
+
"grad_norm": 2.751830577850342,
|
| 49648 |
+
"learning_rate": 8.017546970533585e-08,
|
| 49649 |
+
"loss": 0.0331,
|
| 49650 |
+
"step": 6531
|
| 49651 |
+
},
|
| 49652 |
+
{
|
| 49653 |
+
"epoch": 0.9637772039837699,
|
| 49654 |
+
"grad_norm": 1.7253575325012207,
|
| 49655 |
+
"learning_rate": 7.952582506578487e-08,
|
| 49656 |
+
"loss": 0.0539,
|
| 49657 |
+
"step": 6532
|
| 49658 |
+
},
|
| 49659 |
+
{
|
| 49660 |
+
"epoch": 0.9639247510143858,
|
| 49661 |
+
"grad_norm": 2.3146719932556152,
|
| 49662 |
+
"learning_rate": 7.88788126020823e-08,
|
| 49663 |
+
"loss": 0.0372,
|
| 49664 |
+
"step": 6533
|
| 49665 |
+
},
|
| 49666 |
+
{
|
| 49667 |
+
"epoch": 0.9640722980450018,
|
| 49668 |
+
"grad_norm": 3.143972396850586,
|
| 49669 |
+
"learning_rate": 7.823443248589746e-08,
|
| 49670 |
+
"loss": 0.0955,
|
| 49671 |
+
"step": 6534
|
| 49672 |
+
},
|
| 49673 |
+
{
|
| 49674 |
+
"epoch": 0.9642198450756179,
|
| 49675 |
+
"grad_norm": 1.9397855997085571,
|
| 49676 |
+
"learning_rate": 7.759268488820471e-08,
|
| 49677 |
+
"loss": 0.0694,
|
| 49678 |
+
"step": 6535
|
| 49679 |
+
},
|
| 49680 |
+
{
|
| 49681 |
+
"epoch": 0.9643673921062339,
|
| 49682 |
+
"grad_norm": 2.692070484161377,
|
| 49683 |
+
"learning_rate": 7.695356997927561e-08,
|
| 49684 |
+
"loss": 0.0671,
|
| 49685 |
+
"step": 6536
|
| 49686 |
+
},
|
| 49687 |
+
{
|
| 49688 |
+
"epoch": 0.9645149391368498,
|
| 49689 |
+
"grad_norm": 5.281672954559326,
|
| 49690 |
+
"learning_rate": 7.631708792868453e-08,
|
| 49691 |
+
"loss": 0.0625,
|
| 49692 |
+
"step": 6537
|
| 49693 |
+
},
|
| 49694 |
+
{
|
| 49695 |
+
"epoch": 0.9646624861674659,
|
| 49696 |
+
"grad_norm": 2.2048943042755127,
|
| 49697 |
+
"learning_rate": 7.568323890530971e-08,
|
| 49698 |
+
"loss": 0.0905,
|
| 49699 |
+
"step": 6538
|
| 49700 |
+
},
|
| 49701 |
+
{
|
| 49702 |
+
"epoch": 0.9648100331980819,
|
| 49703 |
+
"grad_norm": 1.996284008026123,
|
| 49704 |
+
"learning_rate": 7.505202307732774e-08,
|
| 49705 |
+
"loss": 0.0426,
|
| 49706 |
+
"step": 6539
|
| 49707 |
+
},
|
| 49708 |
+
{
|
| 49709 |
+
"epoch": 0.9649575802286979,
|
| 49710 |
+
"grad_norm": 2.583498001098633,
|
| 49711 |
+
"learning_rate": 7.442344061221684e-08,
|
| 49712 |
+
"loss": 0.0626,
|
| 49713 |
+
"step": 6540
|
| 49714 |
+
},
|
| 49715 |
+
{
|
| 49716 |
+
"epoch": 0.9649575802286979,
|
| 49717 |
+
"eval_accuracy": 0.9782923299565847,
|
| 49718 |
+
"eval_f1": 0.9629629629629629,
|
| 49719 |
+
"eval_loss": 0.05510440468788147,
|
| 49720 |
+
"eval_precision": 0.9798994974874372,
|
| 49721 |
+
"eval_recall": 0.9466019417475728,
|
| 49722 |
+
"eval_runtime": 49.8894,
|
| 49723 |
+
"eval_samples_per_second": 5.833,
|
| 49724 |
+
"eval_steps_per_second": 0.2,
|
| 49725 |
+
"step": 6540
|
| 49726 |
+
},
|
| 49727 |
+
{
|
| 49728 |
+
"epoch": 0.9651051272593139,
|
| 49729 |
+
"grad_norm": 1.578550934791565,
|
| 49730 |
+
"learning_rate": 7.37974916767581e-08,
|
| 49731 |
+
"loss": 0.0493,
|
| 49732 |
+
"step": 6541
|
| 49733 |
+
},
|
| 49734 |
+
{
|
| 49735 |
+
"epoch": 0.9652526742899299,
|
| 49736 |
+
"grad_norm": 4.190537452697754,
|
| 49737 |
+
"learning_rate": 7.317417643703417e-08,
|
| 49738 |
+
"loss": 0.0503,
|
| 49739 |
+
"step": 6542
|
| 49740 |
+
},
|
| 49741 |
+
{
|
| 49742 |
+
"epoch": 0.9654002213205459,
|
| 49743 |
+
"grad_norm": 2.9617536067962646,
|
| 49744 |
+
"learning_rate": 7.255349505842502e-08,
|
| 49745 |
+
"loss": 0.1048,
|
| 49746 |
+
"step": 6543
|
| 49747 |
+
},
|
| 49748 |
+
{
|
| 49749 |
+
"epoch": 0.965547768351162,
|
| 49750 |
+
"grad_norm": 2.5437979698181152,
|
| 49751 |
+
"learning_rate": 7.193544770561777e-08,
|
| 49752 |
+
"loss": 0.0317,
|
| 49753 |
+
"step": 6544
|
| 49754 |
+
},
|
| 49755 |
+
{
|
| 49756 |
+
"epoch": 0.965695315381778,
|
| 49757 |
+
"grad_norm": 1.3040310144424438,
|
| 49758 |
+
"learning_rate": 7.132003454259461e-08,
|
| 49759 |
+
"loss": 0.0471,
|
| 49760 |
+
"step": 6545
|
| 49761 |
+
},
|
| 49762 |
+
{
|
| 49763 |
+
"epoch": 0.9658428624123939,
|
| 49764 |
+
"grad_norm": 2.7270450592041016,
|
| 49765 |
+
"learning_rate": 7.07072557326438e-08,
|
| 49766 |
+
"loss": 0.0799,
|
| 49767 |
+
"step": 6546
|
| 49768 |
+
},
|
| 49769 |
+
{
|
| 49770 |
+
"epoch": 0.96599040944301,
|
| 49771 |
+
"grad_norm": 2.9314723014831543,
|
| 49772 |
+
"learning_rate": 7.009711143835197e-08,
|
| 49773 |
+
"loss": 0.0726,
|
| 49774 |
+
"step": 6547
|
| 49775 |
+
},
|
| 49776 |
+
{
|
| 49777 |
+
"epoch": 0.966137956473626,
|
| 49778 |
+
"grad_norm": 1.3996440172195435,
|
| 49779 |
+
"learning_rate": 6.948960182160624e-08,
|
| 49780 |
+
"loss": 0.0272,
|
| 49781 |
+
"step": 6548
|
| 49782 |
+
},
|
| 49783 |
+
{
|
| 49784 |
+
"epoch": 0.966285503504242,
|
| 49785 |
+
"grad_norm": 3.6665632724761963,
|
| 49786 |
+
"learning_rate": 6.888472704359661e-08,
|
| 49787 |
+
"loss": 0.0339,
|
| 49788 |
+
"step": 6549
|
| 49789 |
+
},
|
| 49790 |
+
{
|
| 49791 |
+
"epoch": 0.966433050534858,
|
| 49792 |
+
"grad_norm": 5.509788513183594,
|
| 49793 |
+
"learning_rate": 6.828248726481357e-08,
|
| 49794 |
+
"loss": 0.0541,
|
| 49795 |
+
"step": 6550
|
| 49796 |
+
},
|
| 49797 |
+
{
|
| 49798 |
+
"epoch": 0.966580597565474,
|
| 49799 |
+
"grad_norm": 1.3570910692214966,
|
| 49800 |
+
"learning_rate": 6.768288264504597e-08,
|
| 49801 |
+
"loss": 0.0237,
|
| 49802 |
+
"step": 6551
|
| 49803 |
+
},
|
| 49804 |
+
{
|
| 49805 |
+
"epoch": 0.96672814459609,
|
| 49806 |
+
"grad_norm": 4.248430252075195,
|
| 49807 |
+
"learning_rate": 6.708591334338655e-08,
|
| 49808 |
+
"loss": 0.093,
|
| 49809 |
+
"step": 6552
|
| 49810 |
+
},
|
| 49811 |
+
{
|
| 49812 |
+
"epoch": 0.9668756916267061,
|
| 49813 |
+
"grad_norm": 2.7404448986053467,
|
| 49814 |
+
"learning_rate": 6.649157951822859e-08,
|
| 49815 |
+
"loss": 0.1065,
|
| 49816 |
+
"step": 6553
|
| 49817 |
+
},
|
| 49818 |
+
{
|
| 49819 |
+
"epoch": 0.967023238657322,
|
| 49820 |
+
"grad_norm": 3.5840132236480713,
|
| 49821 |
+
"learning_rate": 6.589988132726488e-08,
|
| 49822 |
+
"loss": 0.069,
|
| 49823 |
+
"step": 6554
|
| 49824 |
+
},
|
| 49825 |
+
{
|
| 49826 |
+
"epoch": 0.967170785687938,
|
| 49827 |
+
"grad_norm": 1.6288326978683472,
|
| 49828 |
+
"learning_rate": 6.53108189274887e-08,
|
| 49829 |
+
"loss": 0.0352,
|
| 49830 |
+
"step": 6555
|
| 49831 |
+
},
|
| 49832 |
+
{
|
| 49833 |
+
"epoch": 0.967318332718554,
|
| 49834 |
+
"grad_norm": 2.5016865730285645,
|
| 49835 |
+
"learning_rate": 6.472439247519502e-08,
|
| 49836 |
+
"loss": 0.043,
|
| 49837 |
+
"step": 6556
|
| 49838 |
+
},
|
| 49839 |
+
{
|
| 49840 |
+
"epoch": 0.9674658797491701,
|
| 49841 |
+
"grad_norm": 3.6651065349578857,
|
| 49842 |
+
"learning_rate": 6.414060212597939e-08,
|
| 49843 |
+
"loss": 0.1105,
|
| 49844 |
+
"step": 6557
|
| 49845 |
+
},
|
| 49846 |
+
{
|
| 49847 |
+
"epoch": 0.967613426779786,
|
| 49848 |
+
"grad_norm": 3.1000683307647705,
|
| 49849 |
+
"learning_rate": 6.35594480347368e-08,
|
| 49850 |
+
"loss": 0.0607,
|
| 49851 |
+
"step": 6558
|
| 49852 |
+
},
|
| 49853 |
+
{
|
| 49854 |
+
"epoch": 0.967760973810402,
|
| 49855 |
+
"grad_norm": 2.166266441345215,
|
| 49856 |
+
"learning_rate": 6.298093035566278e-08,
|
| 49857 |
+
"loss": 0.057,
|
| 49858 |
+
"step": 6559
|
| 49859 |
+
},
|
| 49860 |
+
{
|
| 49861 |
+
"epoch": 0.9679085208410181,
|
| 49862 |
+
"grad_norm": 1.484336495399475,
|
| 49863 |
+
"learning_rate": 6.240504924225566e-08,
|
| 49864 |
+
"loss": 0.0275,
|
| 49865 |
+
"step": 6560
|
| 49866 |
+
},
|
| 49867 |
+
{
|
| 49868 |
+
"epoch": 0.9679085208410181,
|
| 49869 |
+
"eval_accuracy": 0.9782923299565847,
|
| 49870 |
+
"eval_f1": 0.9629629629629629,
|
| 49871 |
+
"eval_loss": 0.05525950714945793,
|
| 49872 |
+
"eval_precision": 0.9798994974874372,
|
| 49873 |
+
"eval_recall": 0.9466019417475728,
|
| 49874 |
+
"eval_runtime": 51.4712,
|
| 49875 |
+
"eval_samples_per_second": 5.654,
|
| 49876 |
+
"eval_steps_per_second": 0.194,
|
| 49877 |
+
"step": 6560
|
| 49878 |
+
},
|
| 49879 |
+
{
|
| 49880 |
+
"epoch": 0.9680560678716341,
|
| 49881 |
+
"grad_norm": 3.557420015335083,
|
| 49882 |
+
"learning_rate": 6.183180484731211e-08,
|
| 49883 |
+
"loss": 0.0435,
|
| 49884 |
+
"step": 6561
|
| 49885 |
+
},
|
| 49886 |
+
{
|
| 49887 |
+
"epoch": 0.96820361490225,
|
| 49888 |
+
"grad_norm": 1.5809874534606934,
|
| 49889 |
+
"learning_rate": 6.126119732292935e-08,
|
| 49890 |
+
"loss": 0.0234,
|
| 49891 |
+
"step": 6562
|
| 49892 |
+
},
|
| 49893 |
+
{
|
| 49894 |
+
"epoch": 0.9683511619328661,
|
| 49895 |
+
"grad_norm": 1.9705631732940674,
|
| 49896 |
+
"learning_rate": 6.069322682050516e-08,
|
| 49897 |
+
"loss": 0.0528,
|
| 49898 |
+
"step": 6563
|
| 49899 |
+
},
|
| 49900 |
+
{
|
| 49901 |
+
"epoch": 0.9684987089634821,
|
| 49902 |
+
"grad_norm": 1.1486409902572632,
|
| 49903 |
+
"learning_rate": 6.0127893490739e-08,
|
| 49904 |
+
"loss": 0.0193,
|
| 49905 |
+
"step": 6564
|
| 49906 |
+
},
|
| 49907 |
+
{
|
| 49908 |
+
"epoch": 0.9686462559940981,
|
| 49909 |
+
"grad_norm": 2.3699090480804443,
|
| 49910 |
+
"learning_rate": 5.956519748362755e-08,
|
| 49911 |
+
"loss": 0.1015,
|
| 49912 |
+
"step": 6565
|
| 49913 |
+
},
|
| 49914 |
+
{
|
| 49915 |
+
"epoch": 0.9687938030247141,
|
| 49916 |
+
"grad_norm": 1.836517095565796,
|
| 49917 |
+
"learning_rate": 5.900513894847027e-08,
|
| 49918 |
+
"loss": 0.0327,
|
| 49919 |
+
"step": 6566
|
| 49920 |
+
},
|
| 49921 |
+
{
|
| 49922 |
+
"epoch": 0.9689413500553301,
|
| 49923 |
+
"grad_norm": 6.853978633880615,
|
| 49924 |
+
"learning_rate": 5.8447718033868286e-08,
|
| 49925 |
+
"loss": 0.0787,
|
| 49926 |
+
"step": 6567
|
| 49927 |
+
},
|
| 49928 |
+
{
|
| 49929 |
+
"epoch": 0.9690888970859461,
|
| 49930 |
+
"grad_norm": 5.050530910491943,
|
| 49931 |
+
"learning_rate": 5.7892934887717746e-08,
|
| 49932 |
+
"loss": 0.1619,
|
| 49933 |
+
"step": 6568
|
| 49934 |
+
},
|
| 49935 |
+
{
|
| 49936 |
+
"epoch": 0.9692364441165622,
|
| 49937 |
+
"grad_norm": 1.7064549922943115,
|
| 49938 |
+
"learning_rate": 5.734078965721867e-08,
|
| 49939 |
+
"loss": 0.0448,
|
| 49940 |
+
"step": 6569
|
| 49941 |
+
},
|
| 49942 |
+
{
|
| 49943 |
+
"epoch": 0.9693839911471782,
|
| 49944 |
+
"grad_norm": 2.607844114303589,
|
| 49945 |
+
"learning_rate": 5.679128248887167e-08,
|
| 49946 |
+
"loss": 0.0592,
|
| 49947 |
+
"step": 6570
|
| 49948 |
+
},
|
| 49949 |
+
{
|
| 49950 |
+
"epoch": 0.9695315381777941,
|
| 49951 |
+
"grad_norm": 1.541534423828125,
|
| 49952 |
+
"learning_rate": 5.624441352847565e-08,
|
| 49953 |
+
"loss": 0.0363,
|
| 49954 |
+
"step": 6571
|
| 49955 |
+
},
|
| 49956 |
+
{
|
| 49957 |
+
"epoch": 0.9696790852084102,
|
| 49958 |
+
"grad_norm": 4.141221046447754,
|
| 49959 |
+
"learning_rate": 5.5700182921128995e-08,
|
| 49960 |
+
"loss": 0.0448,
|
| 49961 |
+
"step": 6572
|
| 49962 |
+
},
|
| 49963 |
+
{
|
| 49964 |
+
"epoch": 0.9698266322390262,
|
| 49965 |
+
"grad_norm": 2.9067554473876953,
|
| 49966 |
+
"learning_rate": 5.515859081123287e-08,
|
| 49967 |
+
"loss": 0.1044,
|
| 49968 |
+
"step": 6573
|
| 49969 |
+
},
|
| 49970 |
+
{
|
| 49971 |
+
"epoch": 0.9699741792696422,
|
| 49972 |
+
"grad_norm": 4.378636360168457,
|
| 49973 |
+
"learning_rate": 5.461963734248565e-08,
|
| 49974 |
+
"loss": 0.0584,
|
| 49975 |
+
"step": 6574
|
| 49976 |
+
},
|
| 49977 |
+
{
|
| 49978 |
+
"epoch": 0.9701217263002582,
|
| 49979 |
+
"grad_norm": 2.6484222412109375,
|
| 49980 |
+
"learning_rate": 5.4083322657886293e-08,
|
| 49981 |
+
"loss": 0.0497,
|
| 49982 |
+
"step": 6575
|
| 49983 |
+
},
|
| 49984 |
+
{
|
| 49985 |
+
"epoch": 0.9702692733308742,
|
| 49986 |
+
"grad_norm": 1.6235097646713257,
|
| 49987 |
+
"learning_rate": 5.3549646899733186e-08,
|
| 49988 |
+
"loss": 0.0712,
|
| 49989 |
+
"step": 6576
|
| 49990 |
+
},
|
| 49991 |
+
{
|
| 49992 |
+
"epoch": 0.9704168203614902,
|
| 49993 |
+
"grad_norm": 5.442991733551025,
|
| 49994 |
+
"learning_rate": 5.301861020962751e-08,
|
| 49995 |
+
"loss": 0.0672,
|
| 49996 |
+
"step": 6577
|
| 49997 |
+
},
|
| 49998 |
+
{
|
| 49999 |
+
"epoch": 0.9705643673921063,
|
| 50000 |
+
"grad_norm": 1.869346022605896,
|
| 50001 |
+
"learning_rate": 5.249021272846766e-08,
|
| 50002 |
+
"loss": 0.0376,
|
| 50003 |
+
"step": 6578
|
| 50004 |
+
},
|
| 50005 |
+
{
|
| 50006 |
+
"epoch": 0.9707119144227222,
|
| 50007 |
+
"grad_norm": 1.3902775049209595,
|
| 50008 |
+
"learning_rate": 5.1964454596450387e-08,
|
| 50009 |
+
"loss": 0.0533,
|
| 50010 |
+
"step": 6579
|
| 50011 |
+
},
|
| 50012 |
+
{
|
| 50013 |
+
"epoch": 0.9708594614533382,
|
| 50014 |
+
"grad_norm": 1.305187463760376,
|
| 50015 |
+
"learning_rate": 5.14413359530741e-08,
|
| 50016 |
+
"loss": 0.0196,
|
| 50017 |
+
"step": 6580
|
| 50018 |
+
},
|
| 50019 |
+
{
|
| 50020 |
+
"epoch": 0.9708594614533382,
|
| 50021 |
+
"eval_accuracy": 0.9782923299565847,
|
| 50022 |
+
"eval_f1": 0.9629629629629629,
|
| 50023 |
+
"eval_loss": 0.0551002100110054,
|
| 50024 |
+
"eval_precision": 0.9798994974874372,
|
| 50025 |
+
"eval_recall": 0.9466019417475728,
|
| 50026 |
+
"eval_runtime": 50.6556,
|
| 50027 |
+
"eval_samples_per_second": 5.745,
|
| 50028 |
+
"eval_steps_per_second": 0.197,
|
| 50029 |
+
"step": 6580
|
| 50030 |
+
},
|
| 50031 |
+
{
|
| 50032 |
+
"epoch": 0.9710070084839543,
|
| 50033 |
+
"grad_norm": 1.7406283617019653,
|
| 50034 |
+
"learning_rate": 5.0920856937137775e-08,
|
| 50035 |
+
"loss": 0.0479,
|
| 50036 |
+
"step": 6581
|
| 50037 |
+
},
|
| 50038 |
+
{
|
| 50039 |
+
"epoch": 0.9711545555145703,
|
| 50040 |
+
"grad_norm": 1.1636487245559692,
|
| 50041 |
+
"learning_rate": 5.040301768673761e-08,
|
| 50042 |
+
"loss": 0.0181,
|
| 50043 |
+
"step": 6582
|
| 50044 |
+
},
|
| 50045 |
+
{
|
| 50046 |
+
"epoch": 0.9713021025451862,
|
| 50047 |
+
"grad_norm": 3.7341973781585693,
|
| 50048 |
+
"learning_rate": 4.9887818339272586e-08,
|
| 50049 |
+
"loss": 0.0836,
|
| 50050 |
+
"step": 6583
|
| 50051 |
+
},
|
| 50052 |
+
{
|
| 50053 |
+
"epoch": 0.9714496495758023,
|
| 50054 |
+
"grad_norm": 2.6138193607330322,
|
| 50055 |
+
"learning_rate": 4.93752590314367e-08,
|
| 50056 |
+
"loss": 0.0552,
|
| 50057 |
+
"step": 6584
|
| 50058 |
+
},
|
| 50059 |
+
{
|
| 50060 |
+
"epoch": 0.9715971966064183,
|
| 50061 |
+
"grad_norm": 9.698671340942383,
|
| 50062 |
+
"learning_rate": 4.886533989922781e-08,
|
| 50063 |
+
"loss": 0.0285,
|
| 50064 |
+
"step": 6585
|
| 50065 |
+
},
|
| 50066 |
+
{
|
| 50067 |
+
"epoch": 0.9717447436370343,
|
| 50068 |
+
"grad_norm": 2.1348674297332764,
|
| 50069 |
+
"learning_rate": 4.8358061077942163e-08,
|
| 50070 |
+
"loss": 0.0314,
|
| 50071 |
+
"step": 6586
|
| 50072 |
+
},
|
| 50073 |
+
{
|
| 50074 |
+
"epoch": 0.9718922906676503,
|
| 50075 |
+
"grad_norm": 1.8118903636932373,
|
| 50076 |
+
"learning_rate": 4.785342270217319e-08,
|
| 50077 |
+
"loss": 0.0358,
|
| 50078 |
+
"step": 6587
|
| 50079 |
+
},
|
| 50080 |
+
{
|
| 50081 |
+
"epoch": 0.9720398376982663,
|
| 50082 |
+
"grad_norm": 1.8762260675430298,
|
| 50083 |
+
"learning_rate": 4.735142490581601e-08,
|
| 50084 |
+
"loss": 0.0485,
|
| 50085 |
+
"step": 6588
|
| 50086 |
+
},
|
| 50087 |
+
{
|
| 50088 |
+
"epoch": 0.9721873847288823,
|
| 50089 |
+
"grad_norm": 2.3938488960266113,
|
| 50090 |
+
"learning_rate": 4.6852067822065195e-08,
|
| 50091 |
+
"loss": 0.0594,
|
| 50092 |
+
"step": 6589
|
| 50093 |
+
},
|
| 50094 |
+
{
|
| 50095 |
+
"epoch": 0.9723349317594984,
|
| 50096 |
+
"grad_norm": 2.4276046752929688,
|
| 50097 |
+
"learning_rate": 4.6355351583412534e-08,
|
| 50098 |
+
"loss": 0.0807,
|
| 50099 |
+
"step": 6590
|
| 50100 |
+
},
|
| 50101 |
+
{
|
| 50102 |
+
"epoch": 0.9724824787901144,
|
| 50103 |
+
"grad_norm": 1.4129104614257812,
|
| 50104 |
+
"learning_rate": 4.5861276321651495e-08,
|
| 50105 |
+
"loss": 0.0256,
|
| 50106 |
+
"step": 6591
|
| 50107 |
+
},
|
| 50108 |
+
{
|
| 50109 |
+
"epoch": 0.9726300258207303,
|
| 50110 |
+
"grad_norm": 2.190324544906616,
|
| 50111 |
+
"learning_rate": 4.5369842167874986e-08,
|
| 50112 |
+
"loss": 0.033,
|
| 50113 |
+
"step": 6592
|
| 50114 |
+
},
|
| 50115 |
+
{
|
| 50116 |
+
"epoch": 0.9727775728513464,
|
| 50117 |
+
"grad_norm": 2.735649347305298,
|
| 50118 |
+
"learning_rate": 4.4881049252472056e-08,
|
| 50119 |
+
"loss": 0.055,
|
| 50120 |
+
"step": 6593
|
| 50121 |
+
},
|
| 50122 |
+
{
|
| 50123 |
+
"epoch": 0.9729251198819624,
|
| 50124 |
+
"grad_norm": 1.2812883853912354,
|
| 50125 |
+
"learning_rate": 4.439489770513339e-08,
|
| 50126 |
+
"loss": 0.0378,
|
| 50127 |
+
"step": 6594
|
| 50128 |
+
},
|
| 50129 |
+
{
|
| 50130 |
+
"epoch": 0.9730726669125784,
|
| 50131 |
+
"grad_norm": 1.0837053060531616,
|
| 50132 |
+
"learning_rate": 4.391138765484915e-08,
|
| 50133 |
+
"loss": 0.0207,
|
| 50134 |
+
"step": 6595
|
| 50135 |
+
},
|
| 50136 |
+
{
|
| 50137 |
+
"epoch": 0.9732202139431944,
|
| 50138 |
+
"grad_norm": 2.2297704219818115,
|
| 50139 |
+
"learning_rate": 4.343051922990782e-08,
|
| 50140 |
+
"loss": 0.0474,
|
| 50141 |
+
"step": 6596
|
| 50142 |
+
},
|
| 50143 |
+
{
|
| 50144 |
+
"epoch": 0.9733677609738104,
|
| 50145 |
+
"grad_norm": 4.039227485656738,
|
| 50146 |
+
"learning_rate": 4.295229255789623e-08,
|
| 50147 |
+
"loss": 0.058,
|
| 50148 |
+
"step": 6597
|
| 50149 |
+
},
|
| 50150 |
+
{
|
| 50151 |
+
"epoch": 0.9735153080044264,
|
| 50152 |
+
"grad_norm": 3.033900737762451,
|
| 50153 |
+
"learning_rate": 4.247670776570178e-08,
|
| 50154 |
+
"loss": 0.0861,
|
| 50155 |
+
"step": 6598
|
| 50156 |
+
},
|
| 50157 |
+
{
|
| 50158 |
+
"epoch": 0.9736628550350425,
|
| 50159 |
+
"grad_norm": 2.549617052078247,
|
| 50160 |
+
"learning_rate": 4.200376497951131e-08,
|
| 50161 |
+
"loss": 0.1002,
|
| 50162 |
+
"step": 6599
|
| 50163 |
+
},
|
| 50164 |
+
{
|
| 50165 |
+
"epoch": 0.9738104020656584,
|
| 50166 |
+
"grad_norm": 2.654305934906006,
|
| 50167 |
+
"learning_rate": 4.153346432480776e-08,
|
| 50168 |
+
"loss": 0.0986,
|
| 50169 |
+
"step": 6600
|
| 50170 |
+
},
|
| 50171 |
+
{
|
| 50172 |
+
"epoch": 0.9738104020656584,
|
| 50173 |
+
"eval_accuracy": 0.9782923299565847,
|
| 50174 |
+
"eval_f1": 0.9629629629629629,
|
| 50175 |
+
"eval_loss": 0.05549389496445656,
|
| 50176 |
+
"eval_precision": 0.9798994974874372,
|
| 50177 |
+
"eval_recall": 0.9466019417475728,
|
| 50178 |
+
"eval_runtime": 50.8482,
|
| 50179 |
+
"eval_samples_per_second": 5.723,
|
| 50180 |
+
"eval_steps_per_second": 0.197,
|
| 50181 |
+
"step": 6600
|
| 50182 |
}
|
| 50183 |
],
|
| 50184 |
"logging_steps": 1,
|
|
|
|
| 50198 |
"attributes": {}
|
| 50199 |
}
|
| 50200 |
},
|
| 50201 |
+
"total_flos": 2.0335814788555735e+18,
|
| 50202 |
"train_batch_size": 8,
|
| 50203 |
"trial_name": null,
|
| 50204 |
"trial_params": null
|