Training in progress, step 6000, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:407cea8cd4c1444b6fd3dbbc1796efb64886678cd52d2935445d4ee150b19cd9
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f09e4f286d588fdd8dee70e7788283d8f82c437d873e13a263f824d89ba1dc09
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ce1f760bbd4c96a2756283dc0ed0049eaa28a856cc915b2efea1a4cad775044
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4119461e04c64bd9cb35fc4677eb47b0256885eb2bf830e5e575de68f0787410
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1789239bff9adb9c6876b4d099f2ed19463d2be8a749c02ae1a04bf9c4fab87a
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4821bd33219546f03dfe0ef15028c7679b8d9837b37430def9e4de554b5dc22a
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7914c18071ba453e15120e4e8596755dd9d2166fc0ded479a8498bd53bfc83d
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfaec33f43af8375c51ba9ca0f8679ccb2f8f39889358a6c520af5ba2029ceed
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c51cd242b6ad96b1a7bd50ac0129e12f629372d44073ce6176ca7a37443f9b6
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67dc8d7c29a337d2af8cab636481f46a6a24034554d74820938adde6717b070b
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:480547ac130fa2a4d7ed2c72cff8ffd28b33c257079ad7f33a9553e30ee18b86
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93d2249e6619e5c532aedb71a6fa0b27cb8510666f06ef4286647cbebdeb62f8
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4055e4142f36e5b7ad8acd183073cd010060ffca6c79c7221bfc55a921e1e477
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -44859,6 +44859,766 @@
|
|
| 44859 |
"eval_samples_per_second": 5.928,
|
| 44860 |
"eval_steps_per_second": 0.204,
|
| 44861 |
"step": 5900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44862 |
}
|
| 44863 |
],
|
| 44864 |
"logging_steps": 1,
|
|
@@ -44878,7 +45638,7 @@
|
|
| 44878 |
"attributes": {}
|
| 44879 |
}
|
| 44880 |
},
|
| 44881 |
-
"total_flos": 1.
|
| 44882 |
"train_batch_size": 8,
|
| 44883 |
"trial_name": null,
|
| 44884 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.8852821836960532,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 44859 |
"eval_samples_per_second": 5.928,
|
| 44860 |
"eval_steps_per_second": 0.204,
|
| 44861 |
"step": 5900
|
| 44862 |
+
},
|
| 44863 |
+
{
|
| 44864 |
+
"epoch": 0.8706750276650682,
|
| 44865 |
+
"grad_norm": 4.275539398193359,
|
| 44866 |
+
"learning_rate": 1.000873989914234e-06,
|
| 44867 |
+
"loss": 0.0684,
|
| 44868 |
+
"step": 5901
|
| 44869 |
+
},
|
| 44870 |
+
{
|
| 44871 |
+
"epoch": 0.8708225746956842,
|
| 44872 |
+
"grad_norm": 3.274839162826538,
|
| 44873 |
+
"learning_rate": 9.98628987339134e-07,
|
| 44874 |
+
"loss": 0.0949,
|
| 44875 |
+
"step": 5902
|
| 44876 |
+
},
|
| 44877 |
+
{
|
| 44878 |
+
"epoch": 0.8709701217263003,
|
| 44879 |
+
"grad_norm": 2.307234764099121,
|
| 44880 |
+
"learning_rate": 9.963863730764222e-07,
|
| 44881 |
+
"loss": 0.0624,
|
| 44882 |
+
"step": 5903
|
| 44883 |
+
},
|
| 44884 |
+
{
|
| 44885 |
+
"epoch": 0.8711176687569163,
|
| 44886 |
+
"grad_norm": 2.237243413925171,
|
| 44887 |
+
"learning_rate": 9.941461477211301e-07,
|
| 44888 |
+
"loss": 0.086,
|
| 44889 |
+
"step": 5904
|
| 44890 |
+
},
|
| 44891 |
+
{
|
| 44892 |
+
"epoch": 0.8712652157875322,
|
| 44893 |
+
"grad_norm": 2.7823374271392822,
|
| 44894 |
+
"learning_rate": 9.919083118676465e-07,
|
| 44895 |
+
"loss": 0.0639,
|
| 44896 |
+
"step": 5905
|
| 44897 |
+
},
|
| 44898 |
+
{
|
| 44899 |
+
"epoch": 0.8714127628181483,
|
| 44900 |
+
"grad_norm": 3.3530755043029785,
|
| 44901 |
+
"learning_rate": 9.896728661097332e-07,
|
| 44902 |
+
"loss": 0.0129,
|
| 44903 |
+
"step": 5906
|
| 44904 |
+
},
|
| 44905 |
+
{
|
| 44906 |
+
"epoch": 0.8715603098487643,
|
| 44907 |
+
"grad_norm": 1.7607320547103882,
|
| 44908 |
+
"learning_rate": 9.874398110405182e-07,
|
| 44909 |
+
"loss": 0.044,
|
| 44910 |
+
"step": 5907
|
| 44911 |
+
},
|
| 44912 |
+
{
|
| 44913 |
+
"epoch": 0.8717078568793803,
|
| 44914 |
+
"grad_norm": 1.0484280586242676,
|
| 44915 |
+
"learning_rate": 9.852091472524882e-07,
|
| 44916 |
+
"loss": 0.0245,
|
| 44917 |
+
"step": 5908
|
| 44918 |
+
},
|
| 44919 |
+
{
|
| 44920 |
+
"epoch": 0.8718554039099963,
|
| 44921 |
+
"grad_norm": 6.2049055099487305,
|
| 44922 |
+
"learning_rate": 9.829808753375046e-07,
|
| 44923 |
+
"loss": 0.1017,
|
| 44924 |
+
"step": 5909
|
| 44925 |
+
},
|
| 44926 |
+
{
|
| 44927 |
+
"epoch": 0.8720029509406123,
|
| 44928 |
+
"grad_norm": 2.4204776287078857,
|
| 44929 |
+
"learning_rate": 9.807549958867856e-07,
|
| 44930 |
+
"loss": 0.0652,
|
| 44931 |
+
"step": 5910
|
| 44932 |
+
},
|
| 44933 |
+
{
|
| 44934 |
+
"epoch": 0.8721504979712283,
|
| 44935 |
+
"grad_norm": 3.092439651489258,
|
| 44936 |
+
"learning_rate": 9.785315094909188e-07,
|
| 44937 |
+
"loss": 0.1103,
|
| 44938 |
+
"step": 5911
|
| 44939 |
+
},
|
| 44940 |
+
{
|
| 44941 |
+
"epoch": 0.8722980450018444,
|
| 44942 |
+
"grad_norm": 3.280195474624634,
|
| 44943 |
+
"learning_rate": 9.763104167398608e-07,
|
| 44944 |
+
"loss": 0.0362,
|
| 44945 |
+
"step": 5912
|
| 44946 |
+
},
|
| 44947 |
+
{
|
| 44948 |
+
"epoch": 0.8724455920324603,
|
| 44949 |
+
"grad_norm": 1.6988095045089722,
|
| 44950 |
+
"learning_rate": 9.740917182229248e-07,
|
| 44951 |
+
"loss": 0.046,
|
| 44952 |
+
"step": 5913
|
| 44953 |
+
},
|
| 44954 |
+
{
|
| 44955 |
+
"epoch": 0.8725931390630763,
|
| 44956 |
+
"grad_norm": 0.9181917905807495,
|
| 44957 |
+
"learning_rate": 9.718754145287922e-07,
|
| 44958 |
+
"loss": 0.0319,
|
| 44959 |
+
"step": 5914
|
| 44960 |
+
},
|
| 44961 |
+
{
|
| 44962 |
+
"epoch": 0.8727406860936924,
|
| 44963 |
+
"grad_norm": 1.576512336730957,
|
| 44964 |
+
"learning_rate": 9.696615062455118e-07,
|
| 44965 |
+
"loss": 0.0274,
|
| 44966 |
+
"step": 5915
|
| 44967 |
+
},
|
| 44968 |
+
{
|
| 44969 |
+
"epoch": 0.8728882331243084,
|
| 44970 |
+
"grad_norm": 1.6942555904388428,
|
| 44971 |
+
"learning_rate": 9.674499939604964e-07,
|
| 44972 |
+
"loss": 0.0168,
|
| 44973 |
+
"step": 5916
|
| 44974 |
+
},
|
| 44975 |
+
{
|
| 44976 |
+
"epoch": 0.8730357801549243,
|
| 44977 |
+
"grad_norm": 1.1324032545089722,
|
| 44978 |
+
"learning_rate": 9.652408782605161e-07,
|
| 44979 |
+
"loss": 0.0333,
|
| 44980 |
+
"step": 5917
|
| 44981 |
+
},
|
| 44982 |
+
{
|
| 44983 |
+
"epoch": 0.8731833271855404,
|
| 44984 |
+
"grad_norm": 1.4638354778289795,
|
| 44985 |
+
"learning_rate": 9.63034159731715e-07,
|
| 44986 |
+
"loss": 0.0123,
|
| 44987 |
+
"step": 5918
|
| 44988 |
+
},
|
| 44989 |
+
{
|
| 44990 |
+
"epoch": 0.8733308742161564,
|
| 44991 |
+
"grad_norm": 3.813880681991577,
|
| 44992 |
+
"learning_rate": 9.608298389595926e-07,
|
| 44993 |
+
"loss": 0.0558,
|
| 44994 |
+
"step": 5919
|
| 44995 |
+
},
|
| 44996 |
+
{
|
| 44997 |
+
"epoch": 0.8734784212467724,
|
| 44998 |
+
"grad_norm": 2.4182288646698,
|
| 44999 |
+
"learning_rate": 9.586279165290192e-07,
|
| 45000 |
+
"loss": 0.1096,
|
| 45001 |
+
"step": 5920
|
| 45002 |
+
},
|
| 45003 |
+
{
|
| 45004 |
+
"epoch": 0.8734784212467724,
|
| 45005 |
+
"eval_accuracy": 0.9782923299565847,
|
| 45006 |
+
"eval_f1": 0.9629629629629629,
|
| 45007 |
+
"eval_loss": 0.05622292309999466,
|
| 45008 |
+
"eval_precision": 0.9798994974874372,
|
| 45009 |
+
"eval_recall": 0.9466019417475728,
|
| 45010 |
+
"eval_runtime": 49.8826,
|
| 45011 |
+
"eval_samples_per_second": 5.834,
|
| 45012 |
+
"eval_steps_per_second": 0.2,
|
| 45013 |
+
"step": 5920
|
| 45014 |
+
},
|
| 45015 |
+
{
|
| 45016 |
+
"epoch": 0.8736259682773884,
|
| 45017 |
+
"grad_norm": 1.3478705883026123,
|
| 45018 |
+
"learning_rate": 9.564283930242258e-07,
|
| 45019 |
+
"loss": 0.033,
|
| 45020 |
+
"step": 5921
|
| 45021 |
+
},
|
| 45022 |
+
{
|
| 45023 |
+
"epoch": 0.8737735153080044,
|
| 45024 |
+
"grad_norm": 2.0680789947509766,
|
| 45025 |
+
"learning_rate": 9.542312690288035e-07,
|
| 45026 |
+
"loss": 0.0784,
|
| 45027 |
+
"step": 5922
|
| 45028 |
+
},
|
| 45029 |
+
{
|
| 45030 |
+
"epoch": 0.8739210623386204,
|
| 45031 |
+
"grad_norm": 3.976668357849121,
|
| 45032 |
+
"learning_rate": 9.52036545125714e-07,
|
| 45033 |
+
"loss": 0.1268,
|
| 45034 |
+
"step": 5923
|
| 45035 |
+
},
|
| 45036 |
+
{
|
| 45037 |
+
"epoch": 0.8740686093692365,
|
| 45038 |
+
"grad_norm": 2.448589563369751,
|
| 45039 |
+
"learning_rate": 9.498442218972748e-07,
|
| 45040 |
+
"loss": 0.0588,
|
| 45041 |
+
"step": 5924
|
| 45042 |
+
},
|
| 45043 |
+
{
|
| 45044 |
+
"epoch": 0.8742161563998525,
|
| 45045 |
+
"grad_norm": 1.7691428661346436,
|
| 45046 |
+
"learning_rate": 9.476542999251714e-07,
|
| 45047 |
+
"loss": 0.0443,
|
| 45048 |
+
"step": 5925
|
| 45049 |
+
},
|
| 45050 |
+
{
|
| 45051 |
+
"epoch": 0.8743637034304684,
|
| 45052 |
+
"grad_norm": 2.7442705631256104,
|
| 45053 |
+
"learning_rate": 9.454667797904515e-07,
|
| 45054 |
+
"loss": 0.0751,
|
| 45055 |
+
"step": 5926
|
| 45056 |
+
},
|
| 45057 |
+
{
|
| 45058 |
+
"epoch": 0.8745112504610845,
|
| 45059 |
+
"grad_norm": 3.767246723175049,
|
| 45060 |
+
"learning_rate": 9.432816620735242e-07,
|
| 45061 |
+
"loss": 0.0461,
|
| 45062 |
+
"step": 5927
|
| 45063 |
+
},
|
| 45064 |
+
{
|
| 45065 |
+
"epoch": 0.8746587974917005,
|
| 45066 |
+
"grad_norm": 2.4902091026306152,
|
| 45067 |
+
"learning_rate": 9.410989473541587e-07,
|
| 45068 |
+
"loss": 0.0497,
|
| 45069 |
+
"step": 5928
|
| 45070 |
+
},
|
| 45071 |
+
{
|
| 45072 |
+
"epoch": 0.8748063445223165,
|
| 45073 |
+
"grad_norm": 4.763408184051514,
|
| 45074 |
+
"learning_rate": 9.389186362114921e-07,
|
| 45075 |
+
"loss": 0.0801,
|
| 45076 |
+
"step": 5929
|
| 45077 |
+
},
|
| 45078 |
+
{
|
| 45079 |
+
"epoch": 0.8749538915529325,
|
| 45080 |
+
"grad_norm": 2.5694327354431152,
|
| 45081 |
+
"learning_rate": 9.367407292240228e-07,
|
| 45082 |
+
"loss": 0.0466,
|
| 45083 |
+
"step": 5930
|
| 45084 |
+
},
|
| 45085 |
+
{
|
| 45086 |
+
"epoch": 0.8751014385835485,
|
| 45087 |
+
"grad_norm": 1.3149958848953247,
|
| 45088 |
+
"learning_rate": 9.345652269696059e-07,
|
| 45089 |
+
"loss": 0.0415,
|
| 45090 |
+
"step": 5931
|
| 45091 |
+
},
|
| 45092 |
+
{
|
| 45093 |
+
"epoch": 0.8752489856141645,
|
| 45094 |
+
"grad_norm": 2.9025168418884277,
|
| 45095 |
+
"learning_rate": 9.323921300254657e-07,
|
| 45096 |
+
"loss": 0.0622,
|
| 45097 |
+
"step": 5932
|
| 45098 |
+
},
|
| 45099 |
+
{
|
| 45100 |
+
"epoch": 0.8753965326447806,
|
| 45101 |
+
"grad_norm": 0.823527455329895,
|
| 45102 |
+
"learning_rate": 9.302214389681807e-07,
|
| 45103 |
+
"loss": 0.0141,
|
| 45104 |
+
"step": 5933
|
| 45105 |
+
},
|
| 45106 |
+
{
|
| 45107 |
+
"epoch": 0.8755440796753965,
|
| 45108 |
+
"grad_norm": 2.63572359085083,
|
| 45109 |
+
"learning_rate": 9.280531543736982e-07,
|
| 45110 |
+
"loss": 0.0447,
|
| 45111 |
+
"step": 5934
|
| 45112 |
+
},
|
| 45113 |
+
{
|
| 45114 |
+
"epoch": 0.8756916267060125,
|
| 45115 |
+
"grad_norm": 2.119584560394287,
|
| 45116 |
+
"learning_rate": 9.258872768173255e-07,
|
| 45117 |
+
"loss": 0.0279,
|
| 45118 |
+
"step": 5935
|
| 45119 |
+
},
|
| 45120 |
+
{
|
| 45121 |
+
"epoch": 0.8758391737366286,
|
| 45122 |
+
"grad_norm": 2.0648109912872314,
|
| 45123 |
+
"learning_rate": 9.237238068737265e-07,
|
| 45124 |
+
"loss": 0.0517,
|
| 45125 |
+
"step": 5936
|
| 45126 |
+
},
|
| 45127 |
+
{
|
| 45128 |
+
"epoch": 0.8759867207672446,
|
| 45129 |
+
"grad_norm": 1.330884337425232,
|
| 45130 |
+
"learning_rate": 9.215627451169318e-07,
|
| 45131 |
+
"loss": 0.0191,
|
| 45132 |
+
"step": 5937
|
| 45133 |
+
},
|
| 45134 |
+
{
|
| 45135 |
+
"epoch": 0.8761342677978605,
|
| 45136 |
+
"grad_norm": 6.870659351348877,
|
| 45137 |
+
"learning_rate": 9.194040921203284e-07,
|
| 45138 |
+
"loss": 0.106,
|
| 45139 |
+
"step": 5938
|
| 45140 |
+
},
|
| 45141 |
+
{
|
| 45142 |
+
"epoch": 0.8762818148284766,
|
| 45143 |
+
"grad_norm": 3.1247828006744385,
|
| 45144 |
+
"learning_rate": 9.172478484566671e-07,
|
| 45145 |
+
"loss": 0.0726,
|
| 45146 |
+
"step": 5939
|
| 45147 |
+
},
|
| 45148 |
+
{
|
| 45149 |
+
"epoch": 0.8764293618590926,
|
| 45150 |
+
"grad_norm": 3.2152442932128906,
|
| 45151 |
+
"learning_rate": 9.150940146980624e-07,
|
| 45152 |
+
"loss": 0.0933,
|
| 45153 |
+
"step": 5940
|
| 45154 |
+
},
|
| 45155 |
+
{
|
| 45156 |
+
"epoch": 0.8764293618590926,
|
| 45157 |
+
"eval_accuracy": 0.9782923299565847,
|
| 45158 |
+
"eval_f1": 0.9629629629629629,
|
| 45159 |
+
"eval_loss": 0.05513066053390503,
|
| 45160 |
+
"eval_precision": 0.9798994974874372,
|
| 45161 |
+
"eval_recall": 0.9466019417475728,
|
| 45162 |
+
"eval_runtime": 49.8287,
|
| 45163 |
+
"eval_samples_per_second": 5.84,
|
| 45164 |
+
"eval_steps_per_second": 0.201,
|
| 45165 |
+
"step": 5940
|
| 45166 |
+
},
|
| 45167 |
+
{
|
| 45168 |
+
"epoch": 0.8765769088897086,
|
| 45169 |
+
"grad_norm": 3.2563045024871826,
|
| 45170 |
+
"learning_rate": 9.129425914159839e-07,
|
| 45171 |
+
"loss": 0.0574,
|
| 45172 |
+
"step": 5941
|
| 45173 |
+
},
|
| 45174 |
+
{
|
| 45175 |
+
"epoch": 0.8767244559203246,
|
| 45176 |
+
"grad_norm": 2.5582735538482666,
|
| 45177 |
+
"learning_rate": 9.107935791812605e-07,
|
| 45178 |
+
"loss": 0.0449,
|
| 45179 |
+
"step": 5942
|
| 45180 |
+
},
|
| 45181 |
+
{
|
| 45182 |
+
"epoch": 0.8768720029509406,
|
| 45183 |
+
"grad_norm": 1.2111361026763916,
|
| 45184 |
+
"learning_rate": 9.086469785640862e-07,
|
| 45185 |
+
"loss": 0.0268,
|
| 45186 |
+
"step": 5943
|
| 45187 |
+
},
|
| 45188 |
+
{
|
| 45189 |
+
"epoch": 0.8770195499815566,
|
| 45190 |
+
"grad_norm": 2.597418785095215,
|
| 45191 |
+
"learning_rate": 9.065027901340173e-07,
|
| 45192 |
+
"loss": 0.067,
|
| 45193 |
+
"step": 5944
|
| 45194 |
+
},
|
| 45195 |
+
{
|
| 45196 |
+
"epoch": 0.8771670970121727,
|
| 45197 |
+
"grad_norm": 1.3513870239257812,
|
| 45198 |
+
"learning_rate": 9.043610144599612e-07,
|
| 45199 |
+
"loss": 0.0342,
|
| 45200 |
+
"step": 5945
|
| 45201 |
+
},
|
| 45202 |
+
{
|
| 45203 |
+
"epoch": 0.8773146440427887,
|
| 45204 |
+
"grad_norm": 1.4286096096038818,
|
| 45205 |
+
"learning_rate": 9.022216521101934e-07,
|
| 45206 |
+
"loss": 0.0356,
|
| 45207 |
+
"step": 5946
|
| 45208 |
+
},
|
| 45209 |
+
{
|
| 45210 |
+
"epoch": 0.8774621910734046,
|
| 45211 |
+
"grad_norm": 2.183363437652588,
|
| 45212 |
+
"learning_rate": 9.00084703652343e-07,
|
| 45213 |
+
"loss": 0.0446,
|
| 45214 |
+
"step": 5947
|
| 45215 |
+
},
|
| 45216 |
+
{
|
| 45217 |
+
"epoch": 0.8776097381040207,
|
| 45218 |
+
"grad_norm": 3.5890183448791504,
|
| 45219 |
+
"learning_rate": 8.979501696534032e-07,
|
| 45220 |
+
"loss": 0.0908,
|
| 45221 |
+
"step": 5948
|
| 45222 |
+
},
|
| 45223 |
+
{
|
| 45224 |
+
"epoch": 0.8777572851346367,
|
| 45225 |
+
"grad_norm": 1.664736270904541,
|
| 45226 |
+
"learning_rate": 8.958180506797265e-07,
|
| 45227 |
+
"loss": 0.0466,
|
| 45228 |
+
"step": 5949
|
| 45229 |
+
},
|
| 45230 |
+
{
|
| 45231 |
+
"epoch": 0.8779048321652527,
|
| 45232 |
+
"grad_norm": 3.184309244155884,
|
| 45233 |
+
"learning_rate": 8.936883472970193e-07,
|
| 45234 |
+
"loss": 0.0774,
|
| 45235 |
+
"step": 5950
|
| 45236 |
+
},
|
| 45237 |
+
{
|
| 45238 |
+
"epoch": 0.8780523791958686,
|
| 45239 |
+
"grad_norm": 2.4639813899993896,
|
| 45240 |
+
"learning_rate": 8.915610600703539e-07,
|
| 45241 |
+
"loss": 0.0793,
|
| 45242 |
+
"step": 5951
|
| 45243 |
+
},
|
| 45244 |
+
{
|
| 45245 |
+
"epoch": 0.8781999262264847,
|
| 45246 |
+
"grad_norm": 2.775432825088501,
|
| 45247 |
+
"learning_rate": 8.894361895641568e-07,
|
| 45248 |
+
"loss": 0.0637,
|
| 45249 |
+
"step": 5952
|
| 45250 |
+
},
|
| 45251 |
+
{
|
| 45252 |
+
"epoch": 0.8783474732571007,
|
| 45253 |
+
"grad_norm": 3.227356195449829,
|
| 45254 |
+
"learning_rate": 8.873137363422125e-07,
|
| 45255 |
+
"loss": 0.0733,
|
| 45256 |
+
"step": 5953
|
| 45257 |
+
},
|
| 45258 |
+
{
|
| 45259 |
+
"epoch": 0.8784950202877168,
|
| 45260 |
+
"grad_norm": 1.4808876514434814,
|
| 45261 |
+
"learning_rate": 8.851937009676714e-07,
|
| 45262 |
+
"loss": 0.0535,
|
| 45263 |
+
"step": 5954
|
| 45264 |
+
},
|
| 45265 |
+
{
|
| 45266 |
+
"epoch": 0.8786425673183327,
|
| 45267 |
+
"grad_norm": 2.2464683055877686,
|
| 45268 |
+
"learning_rate": 8.830760840030361e-07,
|
| 45269 |
+
"loss": 0.049,
|
| 45270 |
+
"step": 5955
|
| 45271 |
+
},
|
| 45272 |
+
{
|
| 45273 |
+
"epoch": 0.8787901143489487,
|
| 45274 |
+
"grad_norm": 1.7445260286331177,
|
| 45275 |
+
"learning_rate": 8.80960886010166e-07,
|
| 45276 |
+
"loss": 0.0515,
|
| 45277 |
+
"step": 5956
|
| 45278 |
+
},
|
| 45279 |
+
{
|
| 45280 |
+
"epoch": 0.8789376613795647,
|
| 45281 |
+
"grad_norm": 8.007856369018555,
|
| 45282 |
+
"learning_rate": 8.788481075502831e-07,
|
| 45283 |
+
"loss": 0.0436,
|
| 45284 |
+
"step": 5957
|
| 45285 |
+
},
|
| 45286 |
+
{
|
| 45287 |
+
"epoch": 0.8790852084101808,
|
| 45288 |
+
"grad_norm": 1.341110110282898,
|
| 45289 |
+
"learning_rate": 8.76737749183968e-07,
|
| 45290 |
+
"loss": 0.0147,
|
| 45291 |
+
"step": 5958
|
| 45292 |
+
},
|
| 45293 |
+
{
|
| 45294 |
+
"epoch": 0.8792327554407967,
|
| 45295 |
+
"grad_norm": 1.3692198991775513,
|
| 45296 |
+
"learning_rate": 8.746298114711538e-07,
|
| 45297 |
+
"loss": 0.0286,
|
| 45298 |
+
"step": 5959
|
| 45299 |
+
},
|
| 45300 |
+
{
|
| 45301 |
+
"epoch": 0.8793803024714127,
|
| 45302 |
+
"grad_norm": 2.7240824699401855,
|
| 45303 |
+
"learning_rate": 8.725242949711376e-07,
|
| 45304 |
+
"loss": 0.0482,
|
| 45305 |
+
"step": 5960
|
| 45306 |
+
},
|
| 45307 |
+
{
|
| 45308 |
+
"epoch": 0.8793803024714127,
|
| 45309 |
+
"eval_accuracy": 0.9782923299565847,
|
| 45310 |
+
"eval_f1": 0.9629629629629629,
|
| 45311 |
+
"eval_loss": 0.055228136479854584,
|
| 45312 |
+
"eval_precision": 0.9798994974874372,
|
| 45313 |
+
"eval_recall": 0.9466019417475728,
|
| 45314 |
+
"eval_runtime": 50.0963,
|
| 45315 |
+
"eval_samples_per_second": 5.809,
|
| 45316 |
+
"eval_steps_per_second": 0.2,
|
| 45317 |
+
"step": 5960
|
| 45318 |
+
},
|
| 45319 |
+
{
|
| 45320 |
+
"epoch": 0.8795278495020288,
|
| 45321 |
+
"grad_norm": 1.6086735725402832,
|
| 45322 |
+
"learning_rate": 8.704212002425683e-07,
|
| 45323 |
+
"loss": 0.051,
|
| 45324 |
+
"step": 5961
|
| 45325 |
+
},
|
| 45326 |
+
{
|
| 45327 |
+
"epoch": 0.8796753965326448,
|
| 45328 |
+
"grad_norm": 2.4951272010803223,
|
| 45329 |
+
"learning_rate": 8.683205278434559e-07,
|
| 45330 |
+
"loss": 0.0779,
|
| 45331 |
+
"step": 5962
|
| 45332 |
+
},
|
| 45333 |
+
{
|
| 45334 |
+
"epoch": 0.8798229435632607,
|
| 45335 |
+
"grad_norm": 2.1152498722076416,
|
| 45336 |
+
"learning_rate": 8.662222783311691e-07,
|
| 45337 |
+
"loss": 0.0203,
|
| 45338 |
+
"step": 5963
|
| 45339 |
+
},
|
| 45340 |
+
{
|
| 45341 |
+
"epoch": 0.8799704905938768,
|
| 45342 |
+
"grad_norm": 2.3825652599334717,
|
| 45343 |
+
"learning_rate": 8.641264522624282e-07,
|
| 45344 |
+
"loss": 0.0648,
|
| 45345 |
+
"step": 5964
|
| 45346 |
+
},
|
| 45347 |
+
{
|
| 45348 |
+
"epoch": 0.8801180376244928,
|
| 45349 |
+
"grad_norm": 1.6257972717285156,
|
| 45350 |
+
"learning_rate": 8.620330501933161e-07,
|
| 45351 |
+
"loss": 0.0628,
|
| 45352 |
+
"step": 5965
|
| 45353 |
+
},
|
| 45354 |
+
{
|
| 45355 |
+
"epoch": 0.8802655846551088,
|
| 45356 |
+
"grad_norm": 0.8832866549491882,
|
| 45357 |
+
"learning_rate": 8.599420726792696e-07,
|
| 45358 |
+
"loss": 0.0181,
|
| 45359 |
+
"step": 5966
|
| 45360 |
+
},
|
| 45361 |
+
{
|
| 45362 |
+
"epoch": 0.8804131316857248,
|
| 45363 |
+
"grad_norm": 3.3614399433135986,
|
| 45364 |
+
"learning_rate": 8.578535202750793e-07,
|
| 45365 |
+
"loss": 0.0355,
|
| 45366 |
+
"step": 5967
|
| 45367 |
+
},
|
| 45368 |
+
{
|
| 45369 |
+
"epoch": 0.8805606787163408,
|
| 45370 |
+
"grad_norm": 1.1095460653305054,
|
| 45371 |
+
"learning_rate": 8.557673935349021e-07,
|
| 45372 |
+
"loss": 0.0147,
|
| 45373 |
+
"step": 5968
|
| 45374 |
+
},
|
| 45375 |
+
{
|
| 45376 |
+
"epoch": 0.8807082257469568,
|
| 45377 |
+
"grad_norm": 2.085298538208008,
|
| 45378 |
+
"learning_rate": 8.536836930122416e-07,
|
| 45379 |
+
"loss": 0.0692,
|
| 45380 |
+
"step": 5969
|
| 45381 |
+
},
|
| 45382 |
+
{
|
| 45383 |
+
"epoch": 0.8808557727775729,
|
| 45384 |
+
"grad_norm": 1.3290832042694092,
|
| 45385 |
+
"learning_rate": 8.516024192599604e-07,
|
| 45386 |
+
"loss": 0.0471,
|
| 45387 |
+
"step": 5970
|
| 45388 |
+
},
|
| 45389 |
+
{
|
| 45390 |
+
"epoch": 0.8810033198081889,
|
| 45391 |
+
"grad_norm": 1.9308030605316162,
|
| 45392 |
+
"learning_rate": 8.495235728302809e-07,
|
| 45393 |
+
"loss": 0.0326,
|
| 45394 |
+
"step": 5971
|
| 45395 |
+
},
|
| 45396 |
+
{
|
| 45397 |
+
"epoch": 0.8811508668388048,
|
| 45398 |
+
"grad_norm": 3.052764654159546,
|
| 45399 |
+
"learning_rate": 8.474471542747742e-07,
|
| 45400 |
+
"loss": 0.0581,
|
| 45401 |
+
"step": 5972
|
| 45402 |
+
},
|
| 45403 |
+
{
|
| 45404 |
+
"epoch": 0.8812984138694209,
|
| 45405 |
+
"grad_norm": 1.6666488647460938,
|
| 45406 |
+
"learning_rate": 8.453731641443741e-07,
|
| 45407 |
+
"loss": 0.0506,
|
| 45408 |
+
"step": 5973
|
| 45409 |
+
},
|
| 45410 |
+
{
|
| 45411 |
+
"epoch": 0.8814459609000369,
|
| 45412 |
+
"grad_norm": 3.605884075164795,
|
| 45413 |
+
"learning_rate": 8.433016029893692e-07,
|
| 45414 |
+
"loss": 0.0608,
|
| 45415 |
+
"step": 5974
|
| 45416 |
+
},
|
| 45417 |
+
{
|
| 45418 |
+
"epoch": 0.8815935079306529,
|
| 45419 |
+
"grad_norm": 2.5897908210754395,
|
| 45420 |
+
"learning_rate": 8.412324713593978e-07,
|
| 45421 |
+
"loss": 0.0588,
|
| 45422 |
+
"step": 5975
|
| 45423 |
+
},
|
| 45424 |
+
{
|
| 45425 |
+
"epoch": 0.8817410549612689,
|
| 45426 |
+
"grad_norm": 0.7357593774795532,
|
| 45427 |
+
"learning_rate": 8.391657698034616e-07,
|
| 45428 |
+
"loss": 0.0121,
|
| 45429 |
+
"step": 5976
|
| 45430 |
+
},
|
| 45431 |
+
{
|
| 45432 |
+
"epoch": 0.8818886019918849,
|
| 45433 |
+
"grad_norm": 1.547512173652649,
|
| 45434 |
+
"learning_rate": 8.3710149886991e-07,
|
| 45435 |
+
"loss": 0.0598,
|
| 45436 |
+
"step": 5977
|
| 45437 |
+
},
|
| 45438 |
+
{
|
| 45439 |
+
"epoch": 0.8820361490225009,
|
| 45440 |
+
"grad_norm": 1.7373154163360596,
|
| 45441 |
+
"learning_rate": 8.350396591064535e-07,
|
| 45442 |
+
"loss": 0.0567,
|
| 45443 |
+
"step": 5978
|
| 45444 |
+
},
|
| 45445 |
+
{
|
| 45446 |
+
"epoch": 0.882183696053117,
|
| 45447 |
+
"grad_norm": 2.9452950954437256,
|
| 45448 |
+
"learning_rate": 8.329802510601559e-07,
|
| 45449 |
+
"loss": 0.0536,
|
| 45450 |
+
"step": 5979
|
| 45451 |
+
},
|
| 45452 |
+
{
|
| 45453 |
+
"epoch": 0.8823312430837329,
|
| 45454 |
+
"grad_norm": 4.783194065093994,
|
| 45455 |
+
"learning_rate": 8.309232752774343e-07,
|
| 45456 |
+
"loss": 0.1723,
|
| 45457 |
+
"step": 5980
|
| 45458 |
+
},
|
| 45459 |
+
{
|
| 45460 |
+
"epoch": 0.8823312430837329,
|
| 45461 |
+
"eval_accuracy": 0.9782923299565847,
|
| 45462 |
+
"eval_f1": 0.9629629629629629,
|
| 45463 |
+
"eval_loss": 0.055726367980241776,
|
| 45464 |
+
"eval_precision": 0.9798994974874372,
|
| 45465 |
+
"eval_recall": 0.9466019417475728,
|
| 45466 |
+
"eval_runtime": 50.9561,
|
| 45467 |
+
"eval_samples_per_second": 5.711,
|
| 45468 |
+
"eval_steps_per_second": 0.196,
|
| 45469 |
+
"step": 5980
|
| 45470 |
+
},
|
| 45471 |
+
{
|
| 45472 |
+
"epoch": 0.8824787901143489,
|
| 45473 |
+
"grad_norm": 2.6260926723480225,
|
| 45474 |
+
"learning_rate": 8.288687323040568e-07,
|
| 45475 |
+
"loss": 0.0891,
|
| 45476 |
+
"step": 5981
|
| 45477 |
+
},
|
| 45478 |
+
{
|
| 45479 |
+
"epoch": 0.882626337144965,
|
| 45480 |
+
"grad_norm": 2.6471948623657227,
|
| 45481 |
+
"learning_rate": 8.26816622685157e-07,
|
| 45482 |
+
"loss": 0.0756,
|
| 45483 |
+
"step": 5982
|
| 45484 |
+
},
|
| 45485 |
+
{
|
| 45486 |
+
"epoch": 0.882773884175581,
|
| 45487 |
+
"grad_norm": 3.824842691421509,
|
| 45488 |
+
"learning_rate": 8.247669469652142e-07,
|
| 45489 |
+
"loss": 0.1009,
|
| 45490 |
+
"step": 5983
|
| 45491 |
+
},
|
| 45492 |
+
{
|
| 45493 |
+
"epoch": 0.8829214312061969,
|
| 45494 |
+
"grad_norm": 1.891882061958313,
|
| 45495 |
+
"learning_rate": 8.227197056880609e-07,
|
| 45496 |
+
"loss": 0.0769,
|
| 45497 |
+
"step": 5984
|
| 45498 |
+
},
|
| 45499 |
+
{
|
| 45500 |
+
"epoch": 0.883068978236813,
|
| 45501 |
+
"grad_norm": 2.1029231548309326,
|
| 45502 |
+
"learning_rate": 8.206748993968916e-07,
|
| 45503 |
+
"loss": 0.0392,
|
| 45504 |
+
"step": 5985
|
| 45505 |
+
},
|
| 45506 |
+
{
|
| 45507 |
+
"epoch": 0.883216525267429,
|
| 45508 |
+
"grad_norm": 1.9693273305892944,
|
| 45509 |
+
"learning_rate": 8.186325286342456e-07,
|
| 45510 |
+
"loss": 0.055,
|
| 45511 |
+
"step": 5986
|
| 45512 |
+
},
|
| 45513 |
+
{
|
| 45514 |
+
"epoch": 0.883364072298045,
|
| 45515 |
+
"grad_norm": 1.6374262571334839,
|
| 45516 |
+
"learning_rate": 8.165925939420227e-07,
|
| 45517 |
+
"loss": 0.0462,
|
| 45518 |
+
"step": 5987
|
| 45519 |
+
},
|
| 45520 |
+
{
|
| 45521 |
+
"epoch": 0.883511619328661,
|
| 45522 |
+
"grad_norm": 2.6680667400360107,
|
| 45523 |
+
"learning_rate": 8.145550958614745e-07,
|
| 45524 |
+
"loss": 0.086,
|
| 45525 |
+
"step": 5988
|
| 45526 |
+
},
|
| 45527 |
+
{
|
| 45528 |
+
"epoch": 0.883659166359277,
|
| 45529 |
+
"grad_norm": 2.1669082641601562,
|
| 45530 |
+
"learning_rate": 8.12520034933203e-07,
|
| 45531 |
+
"loss": 0.0585,
|
| 45532 |
+
"step": 5989
|
| 45533 |
+
},
|
| 45534 |
+
{
|
| 45535 |
+
"epoch": 0.883806713389893,
|
| 45536 |
+
"grad_norm": 1.926206350326538,
|
| 45537 |
+
"learning_rate": 8.104874116971683e-07,
|
| 45538 |
+
"loss": 0.0491,
|
| 45539 |
+
"step": 5990
|
| 45540 |
+
},
|
| 45541 |
+
{
|
| 45542 |
+
"epoch": 0.8839542604205091,
|
| 45543 |
+
"grad_norm": 1.9720089435577393,
|
| 45544 |
+
"learning_rate": 8.084572266926805e-07,
|
| 45545 |
+
"loss": 0.0514,
|
| 45546 |
+
"step": 5991
|
| 45547 |
+
},
|
| 45548 |
+
{
|
| 45549 |
+
"epoch": 0.8841018074511251,
|
| 45550 |
+
"grad_norm": 0.9564663767814636,
|
| 45551 |
+
"learning_rate": 8.064294804584027e-07,
|
| 45552 |
+
"loss": 0.041,
|
| 45553 |
+
"step": 5992
|
| 45554 |
+
},
|
| 45555 |
+
{
|
| 45556 |
+
"epoch": 0.884249354481741,
|
| 45557 |
+
"grad_norm": 2.2758212089538574,
|
| 45558 |
+
"learning_rate": 8.044041735323549e-07,
|
| 45559 |
+
"loss": 0.0473,
|
| 45560 |
+
"step": 5993
|
| 45561 |
+
},
|
| 45562 |
+
{
|
| 45563 |
+
"epoch": 0.8843969015123571,
|
| 45564 |
+
"grad_norm": 2.2937676906585693,
|
| 45565 |
+
"learning_rate": 8.023813064519037e-07,
|
| 45566 |
+
"loss": 0.0378,
|
| 45567 |
+
"step": 5994
|
| 45568 |
+
},
|
| 45569 |
+
{
|
| 45570 |
+
"epoch": 0.8845444485429731,
|
| 45571 |
+
"grad_norm": 4.157854080200195,
|
| 45572 |
+
"learning_rate": 8.003608797537754e-07,
|
| 45573 |
+
"loss": 0.0982,
|
| 45574 |
+
"step": 5995
|
| 45575 |
+
},
|
| 45576 |
+
{
|
| 45577 |
+
"epoch": 0.8846919955735891,
|
| 45578 |
+
"grad_norm": 3.3299248218536377,
|
| 45579 |
+
"learning_rate": 7.983428939740412e-07,
|
| 45580 |
+
"loss": 0.0517,
|
| 45581 |
+
"step": 5996
|
| 45582 |
+
},
|
| 45583 |
+
{
|
| 45584 |
+
"epoch": 0.884839542604205,
|
| 45585 |
+
"grad_norm": 1.161108374595642,
|
| 45586 |
+
"learning_rate": 7.963273496481294e-07,
|
| 45587 |
+
"loss": 0.0139,
|
| 45588 |
+
"step": 5997
|
| 45589 |
+
},
|
| 45590 |
+
{
|
| 45591 |
+
"epoch": 0.8849870896348211,
|
| 45592 |
+
"grad_norm": 2.8756136894226074,
|
| 45593 |
+
"learning_rate": 7.943142473108234e-07,
|
| 45594 |
+
"loss": 0.1444,
|
| 45595 |
+
"step": 5998
|
| 45596 |
+
},
|
| 45597 |
+
{
|
| 45598 |
+
"epoch": 0.8851346366654371,
|
| 45599 |
+
"grad_norm": 0.6647000908851624,
|
| 45600 |
+
"learning_rate": 7.923035874962504e-07,
|
| 45601 |
+
"loss": 0.0129,
|
| 45602 |
+
"step": 5999
|
| 45603 |
+
},
|
| 45604 |
+
{
|
| 45605 |
+
"epoch": 0.8852821836960532,
|
| 45606 |
+
"grad_norm": 1.4540106058120728,
|
| 45607 |
+
"learning_rate": 7.902953707378925e-07,
|
| 45608 |
+
"loss": 0.0259,
|
| 45609 |
+
"step": 6000
|
| 45610 |
+
},
|
| 45611 |
+
{
|
| 45612 |
+
"epoch": 0.8852821836960532,
|
| 45613 |
+
"eval_accuracy": 0.9782923299565847,
|
| 45614 |
+
"eval_f1": 0.9629629629629629,
|
| 45615 |
+
"eval_loss": 0.05572304502129555,
|
| 45616 |
+
"eval_precision": 0.9798994974874372,
|
| 45617 |
+
"eval_recall": 0.9466019417475728,
|
| 45618 |
+
"eval_runtime": 49.6282,
|
| 45619 |
+
"eval_samples_per_second": 5.864,
|
| 45620 |
+
"eval_steps_per_second": 0.201,
|
| 45621 |
+
"step": 6000
|
| 45622 |
}
|
| 45623 |
],
|
| 45624 |
"logging_steps": 1,
|
|
|
|
| 45638 |
"attributes": {}
|
| 45639 |
}
|
| 45640 |
},
|
| 45641 |
+
"total_flos": 1.8481947946526966e+18,
|
| 45642 |
"train_batch_size": 8,
|
| 45643 |
"trial_name": null,
|
| 45644 |
"trial_params": null
|