Training in progress, step 6400, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8da094bb58aa315c0dbf65d109a4451df66be453e2431cf1a0c4ecdd9ceebd97
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96e263ebb8f2a41872ded496e1dc52ad7720376bb634867f9591a3794ab7d3a1
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:344a82b08f46e3470679d0297ce3f97fd02b801ccee0da6f53e77cf6d7ea9808
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e62e952e34e8732a682ff9a8aa0dfece0ec8b2415897d03feab5a9570104b06d
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:948187477401863aff3f049c3c44b0abdb0be5c10934fb97375600a1ce977bb0
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25236c04c8f8da3260ea3459f91081dcddc5d5ea2cbe8eabd6054cce06f92faa
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:640b6e661958d243cae1ea127f269a99af317657fa786eb7dc174d158d645b7d
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1f1752f740240735a873bcf35d461dfe262e32638d88fc837774925e8080436
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d98bdde773257c04b20ce747461dedaea858963e3b0ee34044400ab89897a43
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8504bbd2ddff94c982c1681f6db902504da78219fcef7f6818b65d415605b80c
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:196065f493df1d03858eb23e309a1135c0371a98a32ac2a517518646388ed9cf
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7fed0b4c620538bfc967033b7c4ab483c5214d361a87603ce37022eafee14fa
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60228bdd3999ee852e9677ac091321938441c1f39b0d501df20ea306992b3f39
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -47899,6 +47899,766 @@
|
|
| 47899 |
"eval_samples_per_second": 5.799,
|
| 47900 |
"eval_steps_per_second": 0.199,
|
| 47901 |
"step": 6300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47902 |
}
|
| 47903 |
],
|
| 47904 |
"logging_steps": 1,
|
|
@@ -47918,7 +48678,7 @@
|
|
| 47918 |
"attributes": {}
|
| 47919 |
}
|
| 47920 |
},
|
| 47921 |
-
"total_flos": 1.
|
| 47922 |
"train_batch_size": 8,
|
| 47923 |
"trial_name": null,
|
| 47924 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9443009959424566,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6400,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 47899 |
"eval_samples_per_second": 5.799,
|
| 47900 |
"eval_steps_per_second": 0.199,
|
| 47901 |
"step": 6300
|
| 47902 |
+
},
|
| 47903 |
+
{
|
| 47904 |
+
"epoch": 0.9296938399114718,
|
| 47905 |
+
"grad_norm": 3.9481570720672607,
|
| 47906 |
+
"learning_rate": 2.990815310757078e-07,
|
| 47907 |
+
"loss": 0.0607,
|
| 47908 |
+
"step": 6301
|
| 47909 |
+
},
|
| 47910 |
+
{
|
| 47911 |
+
"epoch": 0.9298413869420878,
|
| 47912 |
+
"grad_norm": 3.901336908340454,
|
| 47913 |
+
"learning_rate": 2.978324747844996e-07,
|
| 47914 |
+
"loss": 0.0626,
|
| 47915 |
+
"step": 6302
|
| 47916 |
+
},
|
| 47917 |
+
{
|
| 47918 |
+
"epoch": 0.9299889339727038,
|
| 47919 |
+
"grad_norm": 1.755456566810608,
|
| 47920 |
+
"learning_rate": 2.9658599274635435e-07,
|
| 47921 |
+
"loss": 0.0784,
|
| 47922 |
+
"step": 6303
|
| 47923 |
+
},
|
| 47924 |
+
{
|
| 47925 |
+
"epoch": 0.9301364810033198,
|
| 47926 |
+
"grad_norm": 1.976046085357666,
|
| 47927 |
+
"learning_rate": 2.953420852919997e-07,
|
| 47928 |
+
"loss": 0.0357,
|
| 47929 |
+
"step": 6304
|
| 47930 |
+
},
|
| 47931 |
+
{
|
| 47932 |
+
"epoch": 0.9302840280339358,
|
| 47933 |
+
"grad_norm": 1.2929192781448364,
|
| 47934 |
+
"learning_rate": 2.941007527514772e-07,
|
| 47935 |
+
"loss": 0.0359,
|
| 47936 |
+
"step": 6305
|
| 47937 |
+
},
|
| 47938 |
+
{
|
| 47939 |
+
"epoch": 0.9304315750645519,
|
| 47940 |
+
"grad_norm": 4.491754531860352,
|
| 47941 |
+
"learning_rate": 2.9286199545414675e-07,
|
| 47942 |
+
"loss": 0.0641,
|
| 47943 |
+
"step": 6306
|
| 47944 |
+
},
|
| 47945 |
+
{
|
| 47946 |
+
"epoch": 0.9305791220951678,
|
| 47947 |
+
"grad_norm": 2.579371690750122,
|
| 47948 |
+
"learning_rate": 2.916258137286876e-07,
|
| 47949 |
+
"loss": 0.0456,
|
| 47950 |
+
"step": 6307
|
| 47951 |
+
},
|
| 47952 |
+
{
|
| 47953 |
+
"epoch": 0.9307266691257838,
|
| 47954 |
+
"grad_norm": 3.1688661575317383,
|
| 47955 |
+
"learning_rate": 2.9039220790308965e-07,
|
| 47956 |
+
"loss": 0.0636,
|
| 47957 |
+
"step": 6308
|
| 47958 |
+
},
|
| 47959 |
+
{
|
| 47960 |
+
"epoch": 0.9308742161563999,
|
| 47961 |
+
"grad_norm": 1.6486138105392456,
|
| 47962 |
+
"learning_rate": 2.8916117830466215e-07,
|
| 47963 |
+
"loss": 0.0534,
|
| 47964 |
+
"step": 6309
|
| 47965 |
+
},
|
| 47966 |
+
{
|
| 47967 |
+
"epoch": 0.9310217631870159,
|
| 47968 |
+
"grad_norm": 1.5931568145751953,
|
| 47969 |
+
"learning_rate": 2.8793272526003504e-07,
|
| 47970 |
+
"loss": 0.0462,
|
| 47971 |
+
"step": 6310
|
| 47972 |
+
},
|
| 47973 |
+
{
|
| 47974 |
+
"epoch": 0.9311693102176318,
|
| 47975 |
+
"grad_norm": 2.7471938133239746,
|
| 47976 |
+
"learning_rate": 2.8670684909514854e-07,
|
| 47977 |
+
"loss": 0.0801,
|
| 47978 |
+
"step": 6311
|
| 47979 |
+
},
|
| 47980 |
+
{
|
| 47981 |
+
"epoch": 0.9313168572482479,
|
| 47982 |
+
"grad_norm": 2.7132720947265625,
|
| 47983 |
+
"learning_rate": 2.854835501352615e-07,
|
| 47984 |
+
"loss": 0.0797,
|
| 47985 |
+
"step": 6312
|
| 47986 |
+
},
|
| 47987 |
+
{
|
| 47988 |
+
"epoch": 0.9314644042788639,
|
| 47989 |
+
"grad_norm": 2.3825271129608154,
|
| 47990 |
+
"learning_rate": 2.842628287049498e-07,
|
| 47991 |
+
"loss": 0.0573,
|
| 47992 |
+
"step": 6313
|
| 47993 |
+
},
|
| 47994 |
+
{
|
| 47995 |
+
"epoch": 0.9316119513094799,
|
| 47996 |
+
"grad_norm": 4.8826680183410645,
|
| 47997 |
+
"learning_rate": 2.830446851281021e-07,
|
| 47998 |
+
"loss": 0.0617,
|
| 47999 |
+
"step": 6314
|
| 48000 |
+
},
|
| 48001 |
+
{
|
| 48002 |
+
"epoch": 0.9317594983400959,
|
| 48003 |
+
"grad_norm": 1.6326963901519775,
|
| 48004 |
+
"learning_rate": 2.818291197279277e-07,
|
| 48005 |
+
"loss": 0.0433,
|
| 48006 |
+
"step": 6315
|
| 48007 |
+
},
|
| 48008 |
+
{
|
| 48009 |
+
"epoch": 0.9319070453707119,
|
| 48010 |
+
"grad_norm": 4.223081588745117,
|
| 48011 |
+
"learning_rate": 2.806161328269508e-07,
|
| 48012 |
+
"loss": 0.0342,
|
| 48013 |
+
"step": 6316
|
| 48014 |
+
},
|
| 48015 |
+
{
|
| 48016 |
+
"epoch": 0.9320545924013279,
|
| 48017 |
+
"grad_norm": 1.6812433004379272,
|
| 48018 |
+
"learning_rate": 2.7940572474700724e-07,
|
| 48019 |
+
"loss": 0.0363,
|
| 48020 |
+
"step": 6317
|
| 48021 |
+
},
|
| 48022 |
+
{
|
| 48023 |
+
"epoch": 0.932202139431944,
|
| 48024 |
+
"grad_norm": 1.4430614709854126,
|
| 48025 |
+
"learning_rate": 2.781978958092535e-07,
|
| 48026 |
+
"loss": 0.0376,
|
| 48027 |
+
"step": 6318
|
| 48028 |
+
},
|
| 48029 |
+
{
|
| 48030 |
+
"epoch": 0.9323496864625599,
|
| 48031 |
+
"grad_norm": 1.3855177164077759,
|
| 48032 |
+
"learning_rate": 2.769926463341599e-07,
|
| 48033 |
+
"loss": 0.0322,
|
| 48034 |
+
"step": 6319
|
| 48035 |
+
},
|
| 48036 |
+
{
|
| 48037 |
+
"epoch": 0.9324972334931759,
|
| 48038 |
+
"grad_norm": 0.8598216772079468,
|
| 48039 |
+
"learning_rate": 2.7578997664151176e-07,
|
| 48040 |
+
"loss": 0.0238,
|
| 48041 |
+
"step": 6320
|
| 48042 |
+
},
|
| 48043 |
+
{
|
| 48044 |
+
"epoch": 0.9324972334931759,
|
| 48045 |
+
"eval_accuracy": 0.9782923299565847,
|
| 48046 |
+
"eval_f1": 0.9629629629629629,
|
| 48047 |
+
"eval_loss": 0.05572595074772835,
|
| 48048 |
+
"eval_precision": 0.9798994974874372,
|
| 48049 |
+
"eval_recall": 0.9466019417475728,
|
| 48050 |
+
"eval_runtime": 49.1318,
|
| 48051 |
+
"eval_samples_per_second": 5.923,
|
| 48052 |
+
"eval_steps_per_second": 0.204,
|
| 48053 |
+
"step": 6320
|
| 48054 |
+
},
|
| 48055 |
+
{
|
| 48056 |
+
"epoch": 0.932644780523792,
|
| 48057 |
+
"grad_norm": 2.7048840522766113,
|
| 48058 |
+
"learning_rate": 2.745898870504116e-07,
|
| 48059 |
+
"loss": 0.1049,
|
| 48060 |
+
"step": 6321
|
| 48061 |
+
},
|
| 48062 |
+
{
|
| 48063 |
+
"epoch": 0.932792327554408,
|
| 48064 |
+
"grad_norm": 3.241443634033203,
|
| 48065 |
+
"learning_rate": 2.733923778792769e-07,
|
| 48066 |
+
"loss": 0.119,
|
| 48067 |
+
"step": 6322
|
| 48068 |
+
},
|
| 48069 |
+
{
|
| 48070 |
+
"epoch": 0.932939874585024,
|
| 48071 |
+
"grad_norm": 3.209383249282837,
|
| 48072 |
+
"learning_rate": 2.721974494458368e-07,
|
| 48073 |
+
"loss": 0.0777,
|
| 48074 |
+
"step": 6323
|
| 48075 |
+
},
|
| 48076 |
+
{
|
| 48077 |
+
"epoch": 0.93308742161564,
|
| 48078 |
+
"grad_norm": 3.865638494491577,
|
| 48079 |
+
"learning_rate": 2.7100510206714225e-07,
|
| 48080 |
+
"loss": 0.0896,
|
| 48081 |
+
"step": 6324
|
| 48082 |
+
},
|
| 48083 |
+
{
|
| 48084 |
+
"epoch": 0.933234968646256,
|
| 48085 |
+
"grad_norm": 3.3464627265930176,
|
| 48086 |
+
"learning_rate": 2.6981533605955455e-07,
|
| 48087 |
+
"loss": 0.046,
|
| 48088 |
+
"step": 6325
|
| 48089 |
+
},
|
| 48090 |
+
{
|
| 48091 |
+
"epoch": 0.933382515676872,
|
| 48092 |
+
"grad_norm": 1.673051118850708,
|
| 48093 |
+
"learning_rate": 2.686281517387501e-07,
|
| 48094 |
+
"loss": 0.058,
|
| 48095 |
+
"step": 6326
|
| 48096 |
+
},
|
| 48097 |
+
{
|
| 48098 |
+
"epoch": 0.9335300627074881,
|
| 48099 |
+
"grad_norm": 1.3582005500793457,
|
| 48100 |
+
"learning_rate": 2.674435494197247e-07,
|
| 48101 |
+
"loss": 0.023,
|
| 48102 |
+
"step": 6327
|
| 48103 |
+
},
|
| 48104 |
+
{
|
| 48105 |
+
"epoch": 0.933677609738104,
|
| 48106 |
+
"grad_norm": 1.89664626121521,
|
| 48107 |
+
"learning_rate": 2.662615294167836e-07,
|
| 48108 |
+
"loss": 0.0617,
|
| 48109 |
+
"step": 6328
|
| 48110 |
+
},
|
| 48111 |
+
{
|
| 48112 |
+
"epoch": 0.93382515676872,
|
| 48113 |
+
"grad_norm": 1.3908201456069946,
|
| 48114 |
+
"learning_rate": 2.650820920435493e-07,
|
| 48115 |
+
"loss": 0.0196,
|
| 48116 |
+
"step": 6329
|
| 48117 |
+
},
|
| 48118 |
+
{
|
| 48119 |
+
"epoch": 0.9339727037993361,
|
| 48120 |
+
"grad_norm": 1.5495145320892334,
|
| 48121 |
+
"learning_rate": 2.639052376129614e-07,
|
| 48122 |
+
"loss": 0.0203,
|
| 48123 |
+
"step": 6330
|
| 48124 |
+
},
|
| 48125 |
+
{
|
| 48126 |
+
"epoch": 0.9341202508299521,
|
| 48127 |
+
"grad_norm": 4.128423690795898,
|
| 48128 |
+
"learning_rate": 2.6273096643727015e-07,
|
| 48129 |
+
"loss": 0.0518,
|
| 48130 |
+
"step": 6331
|
| 48131 |
+
},
|
| 48132 |
+
{
|
| 48133 |
+
"epoch": 0.934267797860568,
|
| 48134 |
+
"grad_norm": 1.1344972848892212,
|
| 48135 |
+
"learning_rate": 2.61559278828043e-07,
|
| 48136 |
+
"loss": 0.0158,
|
| 48137 |
+
"step": 6332
|
| 48138 |
+
},
|
| 48139 |
+
{
|
| 48140 |
+
"epoch": 0.934415344891184,
|
| 48141 |
+
"grad_norm": 3.8028311729431152,
|
| 48142 |
+
"learning_rate": 2.603901750961602e-07,
|
| 48143 |
+
"loss": 0.0839,
|
| 48144 |
+
"step": 6333
|
| 48145 |
+
},
|
| 48146 |
+
{
|
| 48147 |
+
"epoch": 0.9345628919218001,
|
| 48148 |
+
"grad_norm": 1.9522721767425537,
|
| 48149 |
+
"learning_rate": 2.5922365555181686e-07,
|
| 48150 |
+
"loss": 0.0402,
|
| 48151 |
+
"step": 6334
|
| 48152 |
+
},
|
| 48153 |
+
{
|
| 48154 |
+
"epoch": 0.9347104389524161,
|
| 48155 |
+
"grad_norm": 2.5471351146698,
|
| 48156 |
+
"learning_rate": 2.5805972050452434e-07,
|
| 48157 |
+
"loss": 0.0524,
|
| 48158 |
+
"step": 6335
|
| 48159 |
+
},
|
| 48160 |
+
{
|
| 48161 |
+
"epoch": 0.934857985983032,
|
| 48162 |
+
"grad_norm": 6.653663158416748,
|
| 48163 |
+
"learning_rate": 2.568983702631067e-07,
|
| 48164 |
+
"loss": 0.1402,
|
| 48165 |
+
"step": 6336
|
| 48166 |
+
},
|
| 48167 |
+
{
|
| 48168 |
+
"epoch": 0.9350055330136481,
|
| 48169 |
+
"grad_norm": 0.4973243176937103,
|
| 48170 |
+
"learning_rate": 2.5573960513570085e-07,
|
| 48171 |
+
"loss": 0.0042,
|
| 48172 |
+
"step": 6337
|
| 48173 |
+
},
|
| 48174 |
+
{
|
| 48175 |
+
"epoch": 0.9351530800442641,
|
| 48176 |
+
"grad_norm": 2.1447861194610596,
|
| 48177 |
+
"learning_rate": 2.5458342542975855e-07,
|
| 48178 |
+
"loss": 0.0523,
|
| 48179 |
+
"step": 6338
|
| 48180 |
+
},
|
| 48181 |
+
{
|
| 48182 |
+
"epoch": 0.9353006270748802,
|
| 48183 |
+
"grad_norm": 2.394932985305786,
|
| 48184 |
+
"learning_rate": 2.5342983145205003e-07,
|
| 48185 |
+
"loss": 0.0951,
|
| 48186 |
+
"step": 6339
|
| 48187 |
+
},
|
| 48188 |
+
{
|
| 48189 |
+
"epoch": 0.9354481741054961,
|
| 48190 |
+
"grad_norm": 3.6239054203033447,
|
| 48191 |
+
"learning_rate": 2.5227882350865154e-07,
|
| 48192 |
+
"loss": 0.0703,
|
| 48193 |
+
"step": 6340
|
| 48194 |
+
},
|
| 48195 |
+
{
|
| 48196 |
+
"epoch": 0.9354481741054961,
|
| 48197 |
+
"eval_accuracy": 0.9782923299565847,
|
| 48198 |
+
"eval_f1": 0.9629629629629629,
|
| 48199 |
+
"eval_loss": 0.05495457723736763,
|
| 48200 |
+
"eval_precision": 0.9798994974874372,
|
| 48201 |
+
"eval_recall": 0.9466019417475728,
|
| 48202 |
+
"eval_runtime": 48.8024,
|
| 48203 |
+
"eval_samples_per_second": 5.963,
|
| 48204 |
+
"eval_steps_per_second": 0.205,
|
| 48205 |
+
"step": 6340
|
| 48206 |
+
},
|
| 48207 |
+
{
|
| 48208 |
+
"epoch": 0.9355957211361121,
|
| 48209 |
+
"grad_norm": 1.973219394683838,
|
| 48210 |
+
"learning_rate": 2.5113040190495986e-07,
|
| 48211 |
+
"loss": 0.0354,
|
| 48212 |
+
"step": 6341
|
| 48213 |
+
},
|
| 48214 |
+
{
|
| 48215 |
+
"epoch": 0.9357432681667281,
|
| 48216 |
+
"grad_norm": 4.711592674255371,
|
| 48217 |
+
"learning_rate": 2.4998456694568016e-07,
|
| 48218 |
+
"loss": 0.0819,
|
| 48219 |
+
"step": 6342
|
| 48220 |
+
},
|
| 48221 |
+
{
|
| 48222 |
+
"epoch": 0.9358908151973442,
|
| 48223 |
+
"grad_norm": 1.5042883157730103,
|
| 48224 |
+
"learning_rate": 2.488413189348371e-07,
|
| 48225 |
+
"loss": 0.0336,
|
| 48226 |
+
"step": 6343
|
| 48227 |
+
},
|
| 48228 |
+
{
|
| 48229 |
+
"epoch": 0.9360383622279601,
|
| 48230 |
+
"grad_norm": 3.2596170902252197,
|
| 48231 |
+
"learning_rate": 2.477006581757657e-07,
|
| 48232 |
+
"loss": 0.0782,
|
| 48233 |
+
"step": 6344
|
| 48234 |
+
},
|
| 48235 |
+
{
|
| 48236 |
+
"epoch": 0.9361859092585761,
|
| 48237 |
+
"grad_norm": 3.104275703430176,
|
| 48238 |
+
"learning_rate": 2.4656258497111285e-07,
|
| 48239 |
+
"loss": 0.0566,
|
| 48240 |
+
"step": 6345
|
| 48241 |
+
},
|
| 48242 |
+
{
|
| 48243 |
+
"epoch": 0.9363334562891922,
|
| 48244 |
+
"grad_norm": 1.263534426689148,
|
| 48245 |
+
"learning_rate": 2.454270996228425e-07,
|
| 48246 |
+
"loss": 0.0165,
|
| 48247 |
+
"step": 6346
|
| 48248 |
+
},
|
| 48249 |
+
{
|
| 48250 |
+
"epoch": 0.9364810033198082,
|
| 48251 |
+
"grad_norm": 1.779310941696167,
|
| 48252 |
+
"learning_rate": 2.4429420243222924e-07,
|
| 48253 |
+
"loss": 0.0441,
|
| 48254 |
+
"step": 6347
|
| 48255 |
+
},
|
| 48256 |
+
{
|
| 48257 |
+
"epoch": 0.9366285503504242,
|
| 48258 |
+
"grad_norm": 2.7920477390289307,
|
| 48259 |
+
"learning_rate": 2.431638936998615e-07,
|
| 48260 |
+
"loss": 0.0906,
|
| 48261 |
+
"step": 6348
|
| 48262 |
+
},
|
| 48263 |
+
{
|
| 48264 |
+
"epoch": 0.9367760973810402,
|
| 48265 |
+
"grad_norm": 3.6723668575286865,
|
| 48266 |
+
"learning_rate": 2.420361737256438e-07,
|
| 48267 |
+
"loss": 0.0574,
|
| 48268 |
+
"step": 6349
|
| 48269 |
+
},
|
| 48270 |
+
{
|
| 48271 |
+
"epoch": 0.9369236444116562,
|
| 48272 |
+
"grad_norm": 1.9342572689056396,
|
| 48273 |
+
"learning_rate": 2.4091104280878906e-07,
|
| 48274 |
+
"loss": 0.0695,
|
| 48275 |
+
"step": 6350
|
| 48276 |
+
},
|
| 48277 |
+
{
|
| 48278 |
+
"epoch": 0.9370711914422722,
|
| 48279 |
+
"grad_norm": 1.4174875020980835,
|
| 48280 |
+
"learning_rate": 2.3978850124782736e-07,
|
| 48281 |
+
"loss": 0.023,
|
| 48282 |
+
"step": 6351
|
| 48283 |
+
},
|
| 48284 |
+
{
|
| 48285 |
+
"epoch": 0.9372187384728883,
|
| 48286 |
+
"grad_norm": 4.9079389572143555,
|
| 48287 |
+
"learning_rate": 2.3866854934059823e-07,
|
| 48288 |
+
"loss": 0.0569,
|
| 48289 |
+
"step": 6352
|
| 48290 |
+
},
|
| 48291 |
+
{
|
| 48292 |
+
"epoch": 0.9373662855035042,
|
| 48293 |
+
"grad_norm": 1.6540197134017944,
|
| 48294 |
+
"learning_rate": 2.375511873842562e-07,
|
| 48295 |
+
"loss": 0.0381,
|
| 48296 |
+
"step": 6353
|
| 48297 |
+
},
|
| 48298 |
+
{
|
| 48299 |
+
"epoch": 0.9375138325341202,
|
| 48300 |
+
"grad_norm": 4.948291301727295,
|
| 48301 |
+
"learning_rate": 2.3643641567526966e-07,
|
| 48302 |
+
"loss": 0.0529,
|
| 48303 |
+
"step": 6354
|
| 48304 |
+
},
|
| 48305 |
+
{
|
| 48306 |
+
"epoch": 0.9376613795647363,
|
| 48307 |
+
"grad_norm": 2.5287394523620605,
|
| 48308 |
+
"learning_rate": 2.3532423450941755e-07,
|
| 48309 |
+
"loss": 0.1042,
|
| 48310 |
+
"step": 6355
|
| 48311 |
+
},
|
| 48312 |
+
{
|
| 48313 |
+
"epoch": 0.9378089265953523,
|
| 48314 |
+
"grad_norm": 2.72228741645813,
|
| 48315 |
+
"learning_rate": 2.3421464418179163e-07,
|
| 48316 |
+
"loss": 0.0786,
|
| 48317 |
+
"step": 6356
|
| 48318 |
+
},
|
| 48319 |
+
{
|
| 48320 |
+
"epoch": 0.9379564736259682,
|
| 48321 |
+
"grad_norm": 0.9982196092605591,
|
| 48322 |
+
"learning_rate": 2.331076449867975e-07,
|
| 48323 |
+
"loss": 0.0208,
|
| 48324 |
+
"step": 6357
|
| 48325 |
+
},
|
| 48326 |
+
{
|
| 48327 |
+
"epoch": 0.9381040206565843,
|
| 48328 |
+
"grad_norm": 2.173036575317383,
|
| 48329 |
+
"learning_rate": 2.3200323721815244e-07,
|
| 48330 |
+
"loss": 0.0312,
|
| 48331 |
+
"step": 6358
|
| 48332 |
+
},
|
| 48333 |
+
{
|
| 48334 |
+
"epoch": 0.9382515676872003,
|
| 48335 |
+
"grad_norm": 2.706411123275757,
|
| 48336 |
+
"learning_rate": 2.309014211688865e-07,
|
| 48337 |
+
"loss": 0.0505,
|
| 48338 |
+
"step": 6359
|
| 48339 |
+
},
|
| 48340 |
+
{
|
| 48341 |
+
"epoch": 0.9383991147178163,
|
| 48342 |
+
"grad_norm": 2.960676908493042,
|
| 48343 |
+
"learning_rate": 2.2980219713134133e-07,
|
| 48344 |
+
"loss": 0.0772,
|
| 48345 |
+
"step": 6360
|
| 48346 |
+
},
|
| 48347 |
+
{
|
| 48348 |
+
"epoch": 0.9383991147178163,
|
| 48349 |
+
"eval_accuracy": 0.9782923299565847,
|
| 48350 |
+
"eval_f1": 0.9629629629629629,
|
| 48351 |
+
"eval_loss": 0.055578552186489105,
|
| 48352 |
+
"eval_precision": 0.9798994974874372,
|
| 48353 |
+
"eval_recall": 0.9466019417475728,
|
| 48354 |
+
"eval_runtime": 49.2114,
|
| 48355 |
+
"eval_samples_per_second": 5.913,
|
| 48356 |
+
"eval_steps_per_second": 0.203,
|
| 48357 |
+
"step": 6360
|
| 48358 |
+
},
|
| 48359 |
+
{
|
| 48360 |
+
"epoch": 0.9385466617484323,
|
| 48361 |
+
"grad_norm": 2.327997922897339,
|
| 48362 |
+
"learning_rate": 2.2870556539717258e-07,
|
| 48363 |
+
"loss": 0.0591,
|
| 48364 |
+
"step": 6361
|
| 48365 |
+
},
|
| 48366 |
+
{
|
| 48367 |
+
"epoch": 0.9386942087790483,
|
| 48368 |
+
"grad_norm": 1.6461297273635864,
|
| 48369 |
+
"learning_rate": 2.2761152625734527e-07,
|
| 48370 |
+
"loss": 0.0382,
|
| 48371 |
+
"step": 6362
|
| 48372 |
+
},
|
| 48373 |
+
{
|
| 48374 |
+
"epoch": 0.9388417558096643,
|
| 48375 |
+
"grad_norm": 1.021716594696045,
|
| 48376 |
+
"learning_rate": 2.2652008000214055e-07,
|
| 48377 |
+
"loss": 0.0326,
|
| 48378 |
+
"step": 6363
|
| 48379 |
+
},
|
| 48380 |
+
{
|
| 48381 |
+
"epoch": 0.9389893028402804,
|
| 48382 |
+
"grad_norm": 1.242136001586914,
|
| 48383 |
+
"learning_rate": 2.2543122692114672e-07,
|
| 48384 |
+
"loss": 0.0328,
|
| 48385 |
+
"step": 6364
|
| 48386 |
+
},
|
| 48387 |
+
{
|
| 48388 |
+
"epoch": 0.9391368498708963,
|
| 48389 |
+
"grad_norm": 1.448472499847412,
|
| 48390 |
+
"learning_rate": 2.2434496730326937e-07,
|
| 48391 |
+
"loss": 0.0381,
|
| 48392 |
+
"step": 6365
|
| 48393 |
+
},
|
| 48394 |
+
{
|
| 48395 |
+
"epoch": 0.9392843969015123,
|
| 48396 |
+
"grad_norm": 3.8739380836486816,
|
| 48397 |
+
"learning_rate": 2.2326130143671908e-07,
|
| 48398 |
+
"loss": 0.046,
|
| 48399 |
+
"step": 6366
|
| 48400 |
+
},
|
| 48401 |
+
{
|
| 48402 |
+
"epoch": 0.9394319439321284,
|
| 48403 |
+
"grad_norm": 2.4928221702575684,
|
| 48404 |
+
"learning_rate": 2.2218022960902696e-07,
|
| 48405 |
+
"loss": 0.0498,
|
| 48406 |
+
"step": 6367
|
| 48407 |
+
},
|
| 48408 |
+
{
|
| 48409 |
+
"epoch": 0.9395794909627444,
|
| 48410 |
+
"grad_norm": 2.8968076705932617,
|
| 48411 |
+
"learning_rate": 2.21101752107028e-07,
|
| 48412 |
+
"loss": 0.0644,
|
| 48413 |
+
"step": 6368
|
| 48414 |
+
},
|
| 48415 |
+
{
|
| 48416 |
+
"epoch": 0.9397270379933604,
|
| 48417 |
+
"grad_norm": 3.011594295501709,
|
| 48418 |
+
"learning_rate": 2.200258692168744e-07,
|
| 48419 |
+
"loss": 0.1531,
|
| 48420 |
+
"step": 6369
|
| 48421 |
+
},
|
| 48422 |
+
{
|
| 48423 |
+
"epoch": 0.9398745850239764,
|
| 48424 |
+
"grad_norm": 1.3657293319702148,
|
| 48425 |
+
"learning_rate": 2.1895258122402563e-07,
|
| 48426 |
+
"loss": 0.0265,
|
| 48427 |
+
"step": 6370
|
| 48428 |
+
},
|
| 48429 |
+
{
|
| 48430 |
+
"epoch": 0.9400221320545924,
|
| 48431 |
+
"grad_norm": 1.2389910221099854,
|
| 48432 |
+
"learning_rate": 2.1788188841325497e-07,
|
| 48433 |
+
"loss": 0.0218,
|
| 48434 |
+
"step": 6371
|
| 48435 |
+
},
|
| 48436 |
+
{
|
| 48437 |
+
"epoch": 0.9401696790852084,
|
| 48438 |
+
"grad_norm": 8.222478866577148,
|
| 48439 |
+
"learning_rate": 2.1681379106864853e-07,
|
| 48440 |
+
"loss": 0.0657,
|
| 48441 |
+
"step": 6372
|
| 48442 |
+
},
|
| 48443 |
+
{
|
| 48444 |
+
"epoch": 0.9403172261158245,
|
| 48445 |
+
"grad_norm": 2.385483980178833,
|
| 48446 |
+
"learning_rate": 2.157482894735996e-07,
|
| 48447 |
+
"loss": 0.016,
|
| 48448 |
+
"step": 6373
|
| 48449 |
+
},
|
| 48450 |
+
{
|
| 48451 |
+
"epoch": 0.9404647731464404,
|
| 48452 |
+
"grad_norm": 2.611680507659912,
|
| 48453 |
+
"learning_rate": 2.146853839108165e-07,
|
| 48454 |
+
"loss": 0.0888,
|
| 48455 |
+
"step": 6374
|
| 48456 |
+
},
|
| 48457 |
+
{
|
| 48458 |
+
"epoch": 0.9406123201770564,
|
| 48459 |
+
"grad_norm": 3.23009991645813,
|
| 48460 |
+
"learning_rate": 2.1362507466231808e-07,
|
| 48461 |
+
"loss": 0.0801,
|
| 48462 |
+
"step": 6375
|
| 48463 |
+
},
|
| 48464 |
+
{
|
| 48465 |
+
"epoch": 0.9407598672076725,
|
| 48466 |
+
"grad_norm": 0.6852890849113464,
|
| 48467 |
+
"learning_rate": 2.1256736200943152e-07,
|
| 48468 |
+
"loss": 0.0077,
|
| 48469 |
+
"step": 6376
|
| 48470 |
+
},
|
| 48471 |
+
{
|
| 48472 |
+
"epoch": 0.9409074142382885,
|
| 48473 |
+
"grad_norm": 0.5792398452758789,
|
| 48474 |
+
"learning_rate": 2.1151224623280008e-07,
|
| 48475 |
+
"loss": 0.0139,
|
| 48476 |
+
"step": 6377
|
| 48477 |
+
},
|
| 48478 |
+
{
|
| 48479 |
+
"epoch": 0.9410549612689044,
|
| 48480 |
+
"grad_norm": 1.1180408000946045,
|
| 48481 |
+
"learning_rate": 2.104597276123721e-07,
|
| 48482 |
+
"loss": 0.0198,
|
| 48483 |
+
"step": 6378
|
| 48484 |
+
},
|
| 48485 |
+
{
|
| 48486 |
+
"epoch": 0.9412025082995205,
|
| 48487 |
+
"grad_norm": 2.6714653968811035,
|
| 48488 |
+
"learning_rate": 2.0940980642741304e-07,
|
| 48489 |
+
"loss": 0.0531,
|
| 48490 |
+
"step": 6379
|
| 48491 |
+
},
|
| 48492 |
+
{
|
| 48493 |
+
"epoch": 0.9413500553301365,
|
| 48494 |
+
"grad_norm": 4.037075519561768,
|
| 48495 |
+
"learning_rate": 2.0836248295649342e-07,
|
| 48496 |
+
"loss": 0.0697,
|
| 48497 |
+
"step": 6380
|
| 48498 |
+
},
|
| 48499 |
+
{
|
| 48500 |
+
"epoch": 0.9413500553301365,
|
| 48501 |
+
"eval_accuracy": 0.9782923299565847,
|
| 48502 |
+
"eval_f1": 0.9629629629629629,
|
| 48503 |
+
"eval_loss": 0.05569841340184212,
|
| 48504 |
+
"eval_precision": 0.9798994974874372,
|
| 48505 |
+
"eval_recall": 0.9466019417475728,
|
| 48506 |
+
"eval_runtime": 49.1432,
|
| 48507 |
+
"eval_samples_per_second": 5.921,
|
| 48508 |
+
"eval_steps_per_second": 0.203,
|
| 48509 |
+
"step": 6380
|
| 48510 |
+
},
|
| 48511 |
+
{
|
| 48512 |
+
"epoch": 0.9414976023607525,
|
| 48513 |
+
"grad_norm": 1.3099905252456665,
|
| 48514 |
+
"learning_rate": 2.0731775747749761e-07,
|
| 48515 |
+
"loss": 0.0283,
|
| 48516 |
+
"step": 6381
|
| 48517 |
+
},
|
| 48518 |
+
{
|
| 48519 |
+
"epoch": 0.9416451493913685,
|
| 48520 |
+
"grad_norm": 3.0591821670532227,
|
| 48521 |
+
"learning_rate": 2.0627563026762053e-07,
|
| 48522 |
+
"loss": 0.0455,
|
| 48523 |
+
"step": 6382
|
| 48524 |
+
},
|
| 48525 |
+
{
|
| 48526 |
+
"epoch": 0.9417926964219845,
|
| 48527 |
+
"grad_norm": 1.2957801818847656,
|
| 48528 |
+
"learning_rate": 2.0523610160336883e-07,
|
| 48529 |
+
"loss": 0.0367,
|
| 48530 |
+
"step": 6383
|
| 48531 |
+
},
|
| 48532 |
+
{
|
| 48533 |
+
"epoch": 0.9419402434526005,
|
| 48534 |
+
"grad_norm": 4.634521007537842,
|
| 48535 |
+
"learning_rate": 2.0419917176055514e-07,
|
| 48536 |
+
"loss": 0.05,
|
| 48537 |
+
"step": 6384
|
| 48538 |
+
},
|
| 48539 |
+
{
|
| 48540 |
+
"epoch": 0.9420877904832166,
|
| 48541 |
+
"grad_norm": 2.9300975799560547,
|
| 48542 |
+
"learning_rate": 2.0316484101430722e-07,
|
| 48543 |
+
"loss": 0.0338,
|
| 48544 |
+
"step": 6385
|
| 48545 |
+
},
|
| 48546 |
+
{
|
| 48547 |
+
"epoch": 0.9422353375138325,
|
| 48548 |
+
"grad_norm": 2.606213092803955,
|
| 48549 |
+
"learning_rate": 2.021331096390622e-07,
|
| 48550 |
+
"loss": 0.0643,
|
| 48551 |
+
"step": 6386
|
| 48552 |
+
},
|
| 48553 |
+
{
|
| 48554 |
+
"epoch": 0.9423828845444485,
|
| 48555 |
+
"grad_norm": 0.7693409323692322,
|
| 48556 |
+
"learning_rate": 2.0110397790856552e-07,
|
| 48557 |
+
"loss": 0.0224,
|
| 48558 |
+
"step": 6387
|
| 48559 |
+
},
|
| 48560 |
+
{
|
| 48561 |
+
"epoch": 0.9425304315750646,
|
| 48562 |
+
"grad_norm": 3.6793739795684814,
|
| 48563 |
+
"learning_rate": 2.0007744609587542e-07,
|
| 48564 |
+
"loss": 0.0514,
|
| 48565 |
+
"step": 6388
|
| 48566 |
+
},
|
| 48567 |
+
{
|
| 48568 |
+
"epoch": 0.9426779786056806,
|
| 48569 |
+
"grad_norm": 1.294873595237732,
|
| 48570 |
+
"learning_rate": 1.9905351447335742e-07,
|
| 48571 |
+
"loss": 0.0233,
|
| 48572 |
+
"step": 6389
|
| 48573 |
+
},
|
| 48574 |
+
{
|
| 48575 |
+
"epoch": 0.9428255256362966,
|
| 48576 |
+
"grad_norm": 1.8262622356414795,
|
| 48577 |
+
"learning_rate": 1.980321833126908e-07,
|
| 48578 |
+
"loss": 0.075,
|
| 48579 |
+
"step": 6390
|
| 48580 |
+
},
|
| 48581 |
+
{
|
| 48582 |
+
"epoch": 0.9429730726669125,
|
| 48583 |
+
"grad_norm": 2.2138702869415283,
|
| 48584 |
+
"learning_rate": 1.9701345288486328e-07,
|
| 48585 |
+
"loss": 0.0569,
|
| 48586 |
+
"step": 6391
|
| 48587 |
+
},
|
| 48588 |
+
{
|
| 48589 |
+
"epoch": 0.9431206196975286,
|
| 48590 |
+
"grad_norm": 1.8159193992614746,
|
| 48591 |
+
"learning_rate": 1.9599732346016974e-07,
|
| 48592 |
+
"loss": 0.0345,
|
| 48593 |
+
"step": 6392
|
| 48594 |
+
},
|
| 48595 |
+
{
|
| 48596 |
+
"epoch": 0.9432681667281446,
|
| 48597 |
+
"grad_norm": 3.349161386489868,
|
| 48598 |
+
"learning_rate": 1.949837953082201e-07,
|
| 48599 |
+
"loss": 0.1248,
|
| 48600 |
+
"step": 6393
|
| 48601 |
+
},
|
| 48602 |
+
{
|
| 48603 |
+
"epoch": 0.9434157137587607,
|
| 48604 |
+
"grad_norm": 1.3875066041946411,
|
| 48605 |
+
"learning_rate": 1.939728686979292e-07,
|
| 48606 |
+
"loss": 0.012,
|
| 48607 |
+
"step": 6394
|
| 48608 |
+
},
|
| 48609 |
+
{
|
| 48610 |
+
"epoch": 0.9435632607893766,
|
| 48611 |
+
"grad_norm": 1.954710602760315,
|
| 48612 |
+
"learning_rate": 1.9296454389752362e-07,
|
| 48613 |
+
"loss": 0.0378,
|
| 48614 |
+
"step": 6395
|
| 48615 |
+
},
|
| 48616 |
+
{
|
| 48617 |
+
"epoch": 0.9437108078199926,
|
| 48618 |
+
"grad_norm": 3.135584592819214,
|
| 48619 |
+
"learning_rate": 1.9195882117454267e-07,
|
| 48620 |
+
"loss": 0.1147,
|
| 48621 |
+
"step": 6396
|
| 48622 |
+
},
|
| 48623 |
+
{
|
| 48624 |
+
"epoch": 0.9438583548506086,
|
| 48625 |
+
"grad_norm": 2.1549274921417236,
|
| 48626 |
+
"learning_rate": 1.909557007958307e-07,
|
| 48627 |
+
"loss": 0.0458,
|
| 48628 |
+
"step": 6397
|
| 48629 |
+
},
|
| 48630 |
+
{
|
| 48631 |
+
"epoch": 0.9440059018812247,
|
| 48632 |
+
"grad_norm": 3.548346519470215,
|
| 48633 |
+
"learning_rate": 1.8995518302754145e-07,
|
| 48634 |
+
"loss": 0.0478,
|
| 48635 |
+
"step": 6398
|
| 48636 |
+
},
|
| 48637 |
+
{
|
| 48638 |
+
"epoch": 0.9441534489118406,
|
| 48639 |
+
"grad_norm": 1.0740330219268799,
|
| 48640 |
+
"learning_rate": 1.8895726813514258e-07,
|
| 48641 |
+
"loss": 0.0244,
|
| 48642 |
+
"step": 6399
|
| 48643 |
+
},
|
| 48644 |
+
{
|
| 48645 |
+
"epoch": 0.9443009959424566,
|
| 48646 |
+
"grad_norm": 3.005735397338867,
|
| 48647 |
+
"learning_rate": 1.8796195638340676e-07,
|
| 48648 |
+
"loss": 0.0446,
|
| 48649 |
+
"step": 6400
|
| 48650 |
+
},
|
| 48651 |
+
{
|
| 48652 |
+
"epoch": 0.9443009959424566,
|
| 48653 |
+
"eval_accuracy": 0.9782923299565847,
|
| 48654 |
+
"eval_f1": 0.9629629629629629,
|
| 48655 |
+
"eval_loss": 0.05581614002585411,
|
| 48656 |
+
"eval_precision": 0.9798994974874372,
|
| 48657 |
+
"eval_recall": 0.9466019417475728,
|
| 48658 |
+
"eval_runtime": 49.0804,
|
| 48659 |
+
"eval_samples_per_second": 5.929,
|
| 48660 |
+
"eval_steps_per_second": 0.204,
|
| 48661 |
+
"step": 6400
|
| 48662 |
}
|
| 48663 |
],
|
| 48664 |
"logging_steps": 1,
|
|
|
|
| 48678 |
"attributes": {}
|
| 48679 |
}
|
| 48680 |
},
|
| 48681 |
+
"total_flos": 1.9720278275952476e+18,
|
| 48682 |
"train_batch_size": 8,
|
| 48683 |
"trial_name": null,
|
| 48684 |
"trial_params": null
|