Commit
·
e1df800
1
Parent(s):
829cbeb
Upload folder using huggingface_hub
Browse files- .ipynb_checkpoints/trainer_state-checkpoint.json +0 -0
- pytorch_model-00001-of-00002.bin +1 -1
- pytorch_model-00002-of-00002.bin +1 -1
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- rng_state_2.pth +1 -1
- rng_state_3.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1803 -3
.ipynb_checkpoints/trainer_state-checkpoint.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pytorch_model-00001-of-00002.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9449597278
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2a22a59a791bc5836ca236e2bfc50cd0da7344d77f3ef42ef59f6210ef899e2
|
| 3 |
size 9449597278
|
pytorch_model-00002-of-00002.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1949353379
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e12c66b8e32114d61e53e1d9c1380cc7a253a8fcc709550fee6d12887a39ded
|
| 3 |
size 1949353379
|
rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4403f335fe3bcfa2bb3442812c81ed28a3c794a53ab3823c8adf65f0e19d6715
|
| 3 |
size 14583
|
rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:921a379f78523ffaeb8e7ffb1bc2889efcb776efb9a8d8b6ec45f71a9421df8a
|
| 3 |
size 14583
|
rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e8383a23c1ee676550095ccbf05f3bb9ee719c8be4fbc85a348a3bb3eb17eb0
|
| 3 |
size 14583
|
rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d107caf2bca1d4f924e93b2ad57ba9de883cd592e5b9459b95858d6f161d781a
|
| 3 |
size 14583
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f7a2719e097124bb18a881ed98b7ce282df9ea30a1e6781ab0ed94992674765
|
| 3 |
size 627
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -16206,11 +16206,1811 @@
|
|
| 16206 |
"learning_rate": 1.886133426475725e-05,
|
| 16207 |
"loss": 0.9238,
|
| 16208 |
"step": 2700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16209 |
}
|
| 16210 |
],
|
| 16211 |
"max_steps": 95425,
|
| 16212 |
"num_train_epochs": 25,
|
| 16213 |
-
"total_flos": 2.
|
| 16214 |
"trial_name": null,
|
| 16215 |
"trial_params": null
|
| 16216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.7858546168958742,
|
| 5 |
+
"global_step": 3000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 16206 |
"learning_rate": 1.886133426475725e-05,
|
| 16207 |
"loss": 0.9238,
|
| 16208 |
"step": 2700
|
| 16209 |
+
},
|
| 16210 |
+
{
|
| 16211 |
+
"epoch": 0.71,
|
| 16212 |
+
"learning_rate": 1.8868319944114567e-05,
|
| 16213 |
+
"loss": 0.7629,
|
| 16214 |
+
"step": 2701
|
| 16215 |
+
},
|
| 16216 |
+
{
|
| 16217 |
+
"epoch": 0.71,
|
| 16218 |
+
"learning_rate": 1.8875305623471885e-05,
|
| 16219 |
+
"loss": 0.9511,
|
| 16220 |
+
"step": 2702
|
| 16221 |
+
},
|
| 16222 |
+
{
|
| 16223 |
+
"epoch": 0.71,
|
| 16224 |
+
"learning_rate": 1.8882291302829203e-05,
|
| 16225 |
+
"loss": 1.0266,
|
| 16226 |
+
"step": 2703
|
| 16227 |
+
},
|
| 16228 |
+
{
|
| 16229 |
+
"epoch": 0.71,
|
| 16230 |
+
"learning_rate": 1.888927698218652e-05,
|
| 16231 |
+
"loss": 0.7274,
|
| 16232 |
+
"step": 2704
|
| 16233 |
+
},
|
| 16234 |
+
{
|
| 16235 |
+
"epoch": 0.71,
|
| 16236 |
+
"learning_rate": 1.8896262661543835e-05,
|
| 16237 |
+
"loss": 0.863,
|
| 16238 |
+
"step": 2705
|
| 16239 |
+
},
|
| 16240 |
+
{
|
| 16241 |
+
"epoch": 0.71,
|
| 16242 |
+
"learning_rate": 1.8903248340901156e-05,
|
| 16243 |
+
"loss": 1.0641,
|
| 16244 |
+
"step": 2706
|
| 16245 |
+
},
|
| 16246 |
+
{
|
| 16247 |
+
"epoch": 0.71,
|
| 16248 |
+
"learning_rate": 1.891023402025847e-05,
|
| 16249 |
+
"loss": 0.9874,
|
| 16250 |
+
"step": 2707
|
| 16251 |
+
},
|
| 16252 |
+
{
|
| 16253 |
+
"epoch": 0.71,
|
| 16254 |
+
"learning_rate": 1.8917219699615788e-05,
|
| 16255 |
+
"loss": 0.8956,
|
| 16256 |
+
"step": 2708
|
| 16257 |
+
},
|
| 16258 |
+
{
|
| 16259 |
+
"epoch": 0.71,
|
| 16260 |
+
"learning_rate": 1.8924205378973106e-05,
|
| 16261 |
+
"loss": 0.9972,
|
| 16262 |
+
"step": 2709
|
| 16263 |
+
},
|
| 16264 |
+
{
|
| 16265 |
+
"epoch": 0.71,
|
| 16266 |
+
"learning_rate": 1.8931191058330424e-05,
|
| 16267 |
+
"loss": 0.9007,
|
| 16268 |
+
"step": 2710
|
| 16269 |
+
},
|
| 16270 |
+
{
|
| 16271 |
+
"epoch": 0.71,
|
| 16272 |
+
"learning_rate": 1.893817673768774e-05,
|
| 16273 |
+
"loss": 0.8585,
|
| 16274 |
+
"step": 2711
|
| 16275 |
+
},
|
| 16276 |
+
{
|
| 16277 |
+
"epoch": 0.71,
|
| 16278 |
+
"learning_rate": 1.894516241704506e-05,
|
| 16279 |
+
"loss": 1.0561,
|
| 16280 |
+
"step": 2712
|
| 16281 |
+
},
|
| 16282 |
+
{
|
| 16283 |
+
"epoch": 0.71,
|
| 16284 |
+
"learning_rate": 1.8952148096402377e-05,
|
| 16285 |
+
"loss": 0.8456,
|
| 16286 |
+
"step": 2713
|
| 16287 |
+
},
|
| 16288 |
+
{
|
| 16289 |
+
"epoch": 0.71,
|
| 16290 |
+
"learning_rate": 1.8959133775759695e-05,
|
| 16291 |
+
"loss": 0.7743,
|
| 16292 |
+
"step": 2714
|
| 16293 |
+
},
|
| 16294 |
+
{
|
| 16295 |
+
"epoch": 0.71,
|
| 16296 |
+
"learning_rate": 1.8966119455117013e-05,
|
| 16297 |
+
"loss": 1.0908,
|
| 16298 |
+
"step": 2715
|
| 16299 |
+
},
|
| 16300 |
+
{
|
| 16301 |
+
"epoch": 0.71,
|
| 16302 |
+
"learning_rate": 1.897310513447433e-05,
|
| 16303 |
+
"loss": 0.8193,
|
| 16304 |
+
"step": 2716
|
| 16305 |
+
},
|
| 16306 |
+
{
|
| 16307 |
+
"epoch": 0.71,
|
| 16308 |
+
"learning_rate": 1.8980090813831645e-05,
|
| 16309 |
+
"loss": 0.7985,
|
| 16310 |
+
"step": 2717
|
| 16311 |
+
},
|
| 16312 |
+
{
|
| 16313 |
+
"epoch": 0.71,
|
| 16314 |
+
"learning_rate": 1.8987076493188966e-05,
|
| 16315 |
+
"loss": 0.8134,
|
| 16316 |
+
"step": 2718
|
| 16317 |
+
},
|
| 16318 |
+
{
|
| 16319 |
+
"epoch": 0.71,
|
| 16320 |
+
"learning_rate": 1.899406217254628e-05,
|
| 16321 |
+
"loss": 0.8623,
|
| 16322 |
+
"step": 2719
|
| 16323 |
+
},
|
| 16324 |
+
{
|
| 16325 |
+
"epoch": 0.71,
|
| 16326 |
+
"learning_rate": 1.90010478519036e-05,
|
| 16327 |
+
"loss": 0.741,
|
| 16328 |
+
"step": 2720
|
| 16329 |
+
},
|
| 16330 |
+
{
|
| 16331 |
+
"epoch": 0.71,
|
| 16332 |
+
"learning_rate": 1.9008033531260916e-05,
|
| 16333 |
+
"loss": 0.8884,
|
| 16334 |
+
"step": 2721
|
| 16335 |
+
},
|
| 16336 |
+
{
|
| 16337 |
+
"epoch": 0.71,
|
| 16338 |
+
"learning_rate": 1.9015019210618234e-05,
|
| 16339 |
+
"loss": 0.9397,
|
| 16340 |
+
"step": 2722
|
| 16341 |
+
},
|
| 16342 |
+
{
|
| 16343 |
+
"epoch": 0.71,
|
| 16344 |
+
"learning_rate": 1.9022004889975552e-05,
|
| 16345 |
+
"loss": 1.0367,
|
| 16346 |
+
"step": 2723
|
| 16347 |
+
},
|
| 16348 |
+
{
|
| 16349 |
+
"epoch": 0.71,
|
| 16350 |
+
"learning_rate": 1.902899056933287e-05,
|
| 16351 |
+
"loss": 0.968,
|
| 16352 |
+
"step": 2724
|
| 16353 |
+
},
|
| 16354 |
+
{
|
| 16355 |
+
"epoch": 0.71,
|
| 16356 |
+
"learning_rate": 1.9035976248690188e-05,
|
| 16357 |
+
"loss": 0.9859,
|
| 16358 |
+
"step": 2725
|
| 16359 |
+
},
|
| 16360 |
+
{
|
| 16361 |
+
"epoch": 0.71,
|
| 16362 |
+
"learning_rate": 1.9042961928047502e-05,
|
| 16363 |
+
"loss": 0.8493,
|
| 16364 |
+
"step": 2726
|
| 16365 |
+
},
|
| 16366 |
+
{
|
| 16367 |
+
"epoch": 0.71,
|
| 16368 |
+
"learning_rate": 1.9049947607404823e-05,
|
| 16369 |
+
"loss": 0.7866,
|
| 16370 |
+
"step": 2727
|
| 16371 |
+
},
|
| 16372 |
+
{
|
| 16373 |
+
"epoch": 0.71,
|
| 16374 |
+
"learning_rate": 1.9056933286762138e-05,
|
| 16375 |
+
"loss": 0.8814,
|
| 16376 |
+
"step": 2728
|
| 16377 |
+
},
|
| 16378 |
+
{
|
| 16379 |
+
"epoch": 0.71,
|
| 16380 |
+
"learning_rate": 1.9063918966119455e-05,
|
| 16381 |
+
"loss": 0.8993,
|
| 16382 |
+
"step": 2729
|
| 16383 |
+
},
|
| 16384 |
+
{
|
| 16385 |
+
"epoch": 0.72,
|
| 16386 |
+
"learning_rate": 1.9070904645476773e-05,
|
| 16387 |
+
"loss": 0.8302,
|
| 16388 |
+
"step": 2730
|
| 16389 |
+
},
|
| 16390 |
+
{
|
| 16391 |
+
"epoch": 0.72,
|
| 16392 |
+
"learning_rate": 1.907789032483409e-05,
|
| 16393 |
+
"loss": 1.0512,
|
| 16394 |
+
"step": 2731
|
| 16395 |
+
},
|
| 16396 |
+
{
|
| 16397 |
+
"epoch": 0.72,
|
| 16398 |
+
"learning_rate": 1.908487600419141e-05,
|
| 16399 |
+
"loss": 0.8844,
|
| 16400 |
+
"step": 2732
|
| 16401 |
+
},
|
| 16402 |
+
{
|
| 16403 |
+
"epoch": 0.72,
|
| 16404 |
+
"learning_rate": 1.9091861683548727e-05,
|
| 16405 |
+
"loss": 0.7771,
|
| 16406 |
+
"step": 2733
|
| 16407 |
+
},
|
| 16408 |
+
{
|
| 16409 |
+
"epoch": 0.72,
|
| 16410 |
+
"learning_rate": 1.9098847362906045e-05,
|
| 16411 |
+
"loss": 0.9898,
|
| 16412 |
+
"step": 2734
|
| 16413 |
+
},
|
| 16414 |
+
{
|
| 16415 |
+
"epoch": 0.72,
|
| 16416 |
+
"learning_rate": 1.9105833042263362e-05,
|
| 16417 |
+
"loss": 0.9656,
|
| 16418 |
+
"step": 2735
|
| 16419 |
+
},
|
| 16420 |
+
{
|
| 16421 |
+
"epoch": 0.72,
|
| 16422 |
+
"learning_rate": 1.9112818721620677e-05,
|
| 16423 |
+
"loss": 0.7224,
|
| 16424 |
+
"step": 2736
|
| 16425 |
+
},
|
| 16426 |
+
{
|
| 16427 |
+
"epoch": 0.72,
|
| 16428 |
+
"learning_rate": 1.9119804400977998e-05,
|
| 16429 |
+
"loss": 0.955,
|
| 16430 |
+
"step": 2737
|
| 16431 |
+
},
|
| 16432 |
+
{
|
| 16433 |
+
"epoch": 0.72,
|
| 16434 |
+
"learning_rate": 1.9126790080335312e-05,
|
| 16435 |
+
"loss": 0.9459,
|
| 16436 |
+
"step": 2738
|
| 16437 |
+
},
|
| 16438 |
+
{
|
| 16439 |
+
"epoch": 0.72,
|
| 16440 |
+
"learning_rate": 1.9133775759692634e-05,
|
| 16441 |
+
"loss": 0.8382,
|
| 16442 |
+
"step": 2739
|
| 16443 |
+
},
|
| 16444 |
+
{
|
| 16445 |
+
"epoch": 0.72,
|
| 16446 |
+
"learning_rate": 1.9140761439049948e-05,
|
| 16447 |
+
"loss": 0.9502,
|
| 16448 |
+
"step": 2740
|
| 16449 |
+
},
|
| 16450 |
+
{
|
| 16451 |
+
"epoch": 0.72,
|
| 16452 |
+
"learning_rate": 1.9147747118407266e-05,
|
| 16453 |
+
"loss": 0.776,
|
| 16454 |
+
"step": 2741
|
| 16455 |
+
},
|
| 16456 |
+
{
|
| 16457 |
+
"epoch": 0.72,
|
| 16458 |
+
"learning_rate": 1.9154732797764584e-05,
|
| 16459 |
+
"loss": 0.8558,
|
| 16460 |
+
"step": 2742
|
| 16461 |
+
},
|
| 16462 |
+
{
|
| 16463 |
+
"epoch": 0.72,
|
| 16464 |
+
"learning_rate": 1.91617184771219e-05,
|
| 16465 |
+
"loss": 0.8276,
|
| 16466 |
+
"step": 2743
|
| 16467 |
+
},
|
| 16468 |
+
{
|
| 16469 |
+
"epoch": 0.72,
|
| 16470 |
+
"learning_rate": 1.916870415647922e-05,
|
| 16471 |
+
"loss": 1.0496,
|
| 16472 |
+
"step": 2744
|
| 16473 |
+
},
|
| 16474 |
+
{
|
| 16475 |
+
"epoch": 0.72,
|
| 16476 |
+
"learning_rate": 1.9175689835836537e-05,
|
| 16477 |
+
"loss": 0.9055,
|
| 16478 |
+
"step": 2745
|
| 16479 |
+
},
|
| 16480 |
+
{
|
| 16481 |
+
"epoch": 0.72,
|
| 16482 |
+
"learning_rate": 1.9182675515193855e-05,
|
| 16483 |
+
"loss": 0.9721,
|
| 16484 |
+
"step": 2746
|
| 16485 |
+
},
|
| 16486 |
+
{
|
| 16487 |
+
"epoch": 0.72,
|
| 16488 |
+
"learning_rate": 1.9189661194551173e-05,
|
| 16489 |
+
"loss": 0.726,
|
| 16490 |
+
"step": 2747
|
| 16491 |
+
},
|
| 16492 |
+
{
|
| 16493 |
+
"epoch": 0.72,
|
| 16494 |
+
"learning_rate": 1.9196646873908487e-05,
|
| 16495 |
+
"loss": 0.9177,
|
| 16496 |
+
"step": 2748
|
| 16497 |
+
},
|
| 16498 |
+
{
|
| 16499 |
+
"epoch": 0.72,
|
| 16500 |
+
"learning_rate": 1.920363255326581e-05,
|
| 16501 |
+
"loss": 0.7223,
|
| 16502 |
+
"step": 2749
|
| 16503 |
+
},
|
| 16504 |
+
{
|
| 16505 |
+
"epoch": 0.72,
|
| 16506 |
+
"learning_rate": 1.9210618232623123e-05,
|
| 16507 |
+
"loss": 0.8865,
|
| 16508 |
+
"step": 2750
|
| 16509 |
+
},
|
| 16510 |
+
{
|
| 16511 |
+
"epoch": 0.72,
|
| 16512 |
+
"learning_rate": 1.9217603911980444e-05,
|
| 16513 |
+
"loss": 0.9815,
|
| 16514 |
+
"step": 2751
|
| 16515 |
+
},
|
| 16516 |
+
{
|
| 16517 |
+
"epoch": 0.72,
|
| 16518 |
+
"learning_rate": 1.922458959133776e-05,
|
| 16519 |
+
"loss": 0.8752,
|
| 16520 |
+
"step": 2752
|
| 16521 |
+
},
|
| 16522 |
+
{
|
| 16523 |
+
"epoch": 0.72,
|
| 16524 |
+
"learning_rate": 1.9231575270695076e-05,
|
| 16525 |
+
"loss": 0.817,
|
| 16526 |
+
"step": 2753
|
| 16527 |
+
},
|
| 16528 |
+
{
|
| 16529 |
+
"epoch": 0.72,
|
| 16530 |
+
"learning_rate": 1.9238560950052394e-05,
|
| 16531 |
+
"loss": 1.1274,
|
| 16532 |
+
"step": 2754
|
| 16533 |
+
},
|
| 16534 |
+
{
|
| 16535 |
+
"epoch": 0.72,
|
| 16536 |
+
"learning_rate": 1.9245546629409712e-05,
|
| 16537 |
+
"loss": 0.9338,
|
| 16538 |
+
"step": 2755
|
| 16539 |
+
},
|
| 16540 |
+
{
|
| 16541 |
+
"epoch": 0.72,
|
| 16542 |
+
"learning_rate": 1.925253230876703e-05,
|
| 16543 |
+
"loss": 0.8909,
|
| 16544 |
+
"step": 2756
|
| 16545 |
+
},
|
| 16546 |
+
{
|
| 16547 |
+
"epoch": 0.72,
|
| 16548 |
+
"learning_rate": 1.9259517988124347e-05,
|
| 16549 |
+
"loss": 0.9714,
|
| 16550 |
+
"step": 2757
|
| 16551 |
+
},
|
| 16552 |
+
{
|
| 16553 |
+
"epoch": 0.72,
|
| 16554 |
+
"learning_rate": 1.9266503667481665e-05,
|
| 16555 |
+
"loss": 0.8739,
|
| 16556 |
+
"step": 2758
|
| 16557 |
+
},
|
| 16558 |
+
{
|
| 16559 |
+
"epoch": 0.72,
|
| 16560 |
+
"learning_rate": 1.9273489346838983e-05,
|
| 16561 |
+
"loss": 0.8507,
|
| 16562 |
+
"step": 2759
|
| 16563 |
+
},
|
| 16564 |
+
{
|
| 16565 |
+
"epoch": 0.72,
|
| 16566 |
+
"learning_rate": 1.92804750261963e-05,
|
| 16567 |
+
"loss": 0.8855,
|
| 16568 |
+
"step": 2760
|
| 16569 |
+
},
|
| 16570 |
+
{
|
| 16571 |
+
"epoch": 0.72,
|
| 16572 |
+
"learning_rate": 1.9287460705553615e-05,
|
| 16573 |
+
"loss": 0.8084,
|
| 16574 |
+
"step": 2761
|
| 16575 |
+
},
|
| 16576 |
+
{
|
| 16577 |
+
"epoch": 0.72,
|
| 16578 |
+
"learning_rate": 1.9294446384910933e-05,
|
| 16579 |
+
"loss": 0.8385,
|
| 16580 |
+
"step": 2762
|
| 16581 |
+
},
|
| 16582 |
+
{
|
| 16583 |
+
"epoch": 0.72,
|
| 16584 |
+
"learning_rate": 1.930143206426825e-05,
|
| 16585 |
+
"loss": 0.8326,
|
| 16586 |
+
"step": 2763
|
| 16587 |
+
},
|
| 16588 |
+
{
|
| 16589 |
+
"epoch": 0.72,
|
| 16590 |
+
"learning_rate": 1.930841774362557e-05,
|
| 16591 |
+
"loss": 0.724,
|
| 16592 |
+
"step": 2764
|
| 16593 |
+
},
|
| 16594 |
+
{
|
| 16595 |
+
"epoch": 0.72,
|
| 16596 |
+
"learning_rate": 1.9315403422982887e-05,
|
| 16597 |
+
"loss": 0.9018,
|
| 16598 |
+
"step": 2765
|
| 16599 |
+
},
|
| 16600 |
+
{
|
| 16601 |
+
"epoch": 0.72,
|
| 16602 |
+
"learning_rate": 1.9322389102340204e-05,
|
| 16603 |
+
"loss": 0.9134,
|
| 16604 |
+
"step": 2766
|
| 16605 |
+
},
|
| 16606 |
+
{
|
| 16607 |
+
"epoch": 0.72,
|
| 16608 |
+
"learning_rate": 1.9329374781697522e-05,
|
| 16609 |
+
"loss": 0.8337,
|
| 16610 |
+
"step": 2767
|
| 16611 |
+
},
|
| 16612 |
+
{
|
| 16613 |
+
"epoch": 0.73,
|
| 16614 |
+
"learning_rate": 1.933636046105484e-05,
|
| 16615 |
+
"loss": 0.8756,
|
| 16616 |
+
"step": 2768
|
| 16617 |
+
},
|
| 16618 |
+
{
|
| 16619 |
+
"epoch": 0.73,
|
| 16620 |
+
"learning_rate": 1.9343346140412154e-05,
|
| 16621 |
+
"loss": 0.854,
|
| 16622 |
+
"step": 2769
|
| 16623 |
+
},
|
| 16624 |
+
{
|
| 16625 |
+
"epoch": 0.73,
|
| 16626 |
+
"learning_rate": 1.9350331819769476e-05,
|
| 16627 |
+
"loss": 0.8368,
|
| 16628 |
+
"step": 2770
|
| 16629 |
+
},
|
| 16630 |
+
{
|
| 16631 |
+
"epoch": 0.73,
|
| 16632 |
+
"learning_rate": 1.935731749912679e-05,
|
| 16633 |
+
"loss": 0.7307,
|
| 16634 |
+
"step": 2771
|
| 16635 |
+
},
|
| 16636 |
+
{
|
| 16637 |
+
"epoch": 0.73,
|
| 16638 |
+
"learning_rate": 1.936430317848411e-05,
|
| 16639 |
+
"loss": 1.0501,
|
| 16640 |
+
"step": 2772
|
| 16641 |
+
},
|
| 16642 |
+
{
|
| 16643 |
+
"epoch": 0.73,
|
| 16644 |
+
"learning_rate": 1.9371288857841426e-05,
|
| 16645 |
+
"loss": 0.8566,
|
| 16646 |
+
"step": 2773
|
| 16647 |
+
},
|
| 16648 |
+
{
|
| 16649 |
+
"epoch": 0.73,
|
| 16650 |
+
"learning_rate": 1.9378274537198743e-05,
|
| 16651 |
+
"loss": 0.8642,
|
| 16652 |
+
"step": 2774
|
| 16653 |
+
},
|
| 16654 |
+
{
|
| 16655 |
+
"epoch": 0.73,
|
| 16656 |
+
"learning_rate": 1.938526021655606e-05,
|
| 16657 |
+
"loss": 0.7805,
|
| 16658 |
+
"step": 2775
|
| 16659 |
+
},
|
| 16660 |
+
{
|
| 16661 |
+
"epoch": 0.73,
|
| 16662 |
+
"learning_rate": 1.939224589591338e-05,
|
| 16663 |
+
"loss": 0.9875,
|
| 16664 |
+
"step": 2776
|
| 16665 |
+
},
|
| 16666 |
+
{
|
| 16667 |
+
"epoch": 0.73,
|
| 16668 |
+
"learning_rate": 1.9399231575270697e-05,
|
| 16669 |
+
"loss": 0.6957,
|
| 16670 |
+
"step": 2777
|
| 16671 |
+
},
|
| 16672 |
+
{
|
| 16673 |
+
"epoch": 0.73,
|
| 16674 |
+
"learning_rate": 1.9406217254628015e-05,
|
| 16675 |
+
"loss": 1.2189,
|
| 16676 |
+
"step": 2778
|
| 16677 |
+
},
|
| 16678 |
+
{
|
| 16679 |
+
"epoch": 0.73,
|
| 16680 |
+
"learning_rate": 1.9413202933985333e-05,
|
| 16681 |
+
"loss": 0.9422,
|
| 16682 |
+
"step": 2779
|
| 16683 |
+
},
|
| 16684 |
+
{
|
| 16685 |
+
"epoch": 0.73,
|
| 16686 |
+
"learning_rate": 1.942018861334265e-05,
|
| 16687 |
+
"loss": 0.7191,
|
| 16688 |
+
"step": 2780
|
| 16689 |
+
},
|
| 16690 |
+
{
|
| 16691 |
+
"epoch": 0.73,
|
| 16692 |
+
"learning_rate": 1.9427174292699965e-05,
|
| 16693 |
+
"loss": 0.6201,
|
| 16694 |
+
"step": 2781
|
| 16695 |
+
},
|
| 16696 |
+
{
|
| 16697 |
+
"epoch": 0.73,
|
| 16698 |
+
"learning_rate": 1.9434159972057286e-05,
|
| 16699 |
+
"loss": 0.8625,
|
| 16700 |
+
"step": 2782
|
| 16701 |
+
},
|
| 16702 |
+
{
|
| 16703 |
+
"epoch": 0.73,
|
| 16704 |
+
"learning_rate": 1.94411456514146e-05,
|
| 16705 |
+
"loss": 0.9498,
|
| 16706 |
+
"step": 2783
|
| 16707 |
+
},
|
| 16708 |
+
{
|
| 16709 |
+
"epoch": 0.73,
|
| 16710 |
+
"learning_rate": 1.944813133077192e-05,
|
| 16711 |
+
"loss": 0.9871,
|
| 16712 |
+
"step": 2784
|
| 16713 |
+
},
|
| 16714 |
+
{
|
| 16715 |
+
"epoch": 0.73,
|
| 16716 |
+
"learning_rate": 1.9455117010129236e-05,
|
| 16717 |
+
"loss": 1.0324,
|
| 16718 |
+
"step": 2785
|
| 16719 |
+
},
|
| 16720 |
+
{
|
| 16721 |
+
"epoch": 0.73,
|
| 16722 |
+
"learning_rate": 1.9462102689486554e-05,
|
| 16723 |
+
"loss": 0.8161,
|
| 16724 |
+
"step": 2786
|
| 16725 |
+
},
|
| 16726 |
+
{
|
| 16727 |
+
"epoch": 0.73,
|
| 16728 |
+
"learning_rate": 1.946908836884387e-05,
|
| 16729 |
+
"loss": 1.0342,
|
| 16730 |
+
"step": 2787
|
| 16731 |
+
},
|
| 16732 |
+
{
|
| 16733 |
+
"epoch": 0.73,
|
| 16734 |
+
"learning_rate": 1.947607404820119e-05,
|
| 16735 |
+
"loss": 1.0001,
|
| 16736 |
+
"step": 2788
|
| 16737 |
+
},
|
| 16738 |
+
{
|
| 16739 |
+
"epoch": 0.73,
|
| 16740 |
+
"learning_rate": 1.9483059727558507e-05,
|
| 16741 |
+
"loss": 0.7979,
|
| 16742 |
+
"step": 2789
|
| 16743 |
+
},
|
| 16744 |
+
{
|
| 16745 |
+
"epoch": 0.73,
|
| 16746 |
+
"learning_rate": 1.9490045406915825e-05,
|
| 16747 |
+
"loss": 0.9713,
|
| 16748 |
+
"step": 2790
|
| 16749 |
+
},
|
| 16750 |
+
{
|
| 16751 |
+
"epoch": 0.73,
|
| 16752 |
+
"learning_rate": 1.9497031086273143e-05,
|
| 16753 |
+
"loss": 0.9352,
|
| 16754 |
+
"step": 2791
|
| 16755 |
+
},
|
| 16756 |
+
{
|
| 16757 |
+
"epoch": 0.73,
|
| 16758 |
+
"learning_rate": 1.950401676563046e-05,
|
| 16759 |
+
"loss": 0.6547,
|
| 16760 |
+
"step": 2792
|
| 16761 |
+
},
|
| 16762 |
+
{
|
| 16763 |
+
"epoch": 0.73,
|
| 16764 |
+
"learning_rate": 1.9511002444987775e-05,
|
| 16765 |
+
"loss": 0.7805,
|
| 16766 |
+
"step": 2793
|
| 16767 |
+
},
|
| 16768 |
+
{
|
| 16769 |
+
"epoch": 0.73,
|
| 16770 |
+
"learning_rate": 1.9517988124345096e-05,
|
| 16771 |
+
"loss": 0.982,
|
| 16772 |
+
"step": 2794
|
| 16773 |
+
},
|
| 16774 |
+
{
|
| 16775 |
+
"epoch": 0.73,
|
| 16776 |
+
"learning_rate": 1.952497380370241e-05,
|
| 16777 |
+
"loss": 0.7602,
|
| 16778 |
+
"step": 2795
|
| 16779 |
+
},
|
| 16780 |
+
{
|
| 16781 |
+
"epoch": 0.73,
|
| 16782 |
+
"learning_rate": 1.953195948305973e-05,
|
| 16783 |
+
"loss": 0.8852,
|
| 16784 |
+
"step": 2796
|
| 16785 |
+
},
|
| 16786 |
+
{
|
| 16787 |
+
"epoch": 0.73,
|
| 16788 |
+
"learning_rate": 1.9538945162417046e-05,
|
| 16789 |
+
"loss": 1.0706,
|
| 16790 |
+
"step": 2797
|
| 16791 |
+
},
|
| 16792 |
+
{
|
| 16793 |
+
"epoch": 0.73,
|
| 16794 |
+
"learning_rate": 1.9545930841774364e-05,
|
| 16795 |
+
"loss": 0.9424,
|
| 16796 |
+
"step": 2798
|
| 16797 |
+
},
|
| 16798 |
+
{
|
| 16799 |
+
"epoch": 0.73,
|
| 16800 |
+
"learning_rate": 1.9552916521131682e-05,
|
| 16801 |
+
"loss": 0.9256,
|
| 16802 |
+
"step": 2799
|
| 16803 |
+
},
|
| 16804 |
+
{
|
| 16805 |
+
"epoch": 0.73,
|
| 16806 |
+
"learning_rate": 1.9559902200489e-05,
|
| 16807 |
+
"loss": 0.8106,
|
| 16808 |
+
"step": 2800
|
| 16809 |
+
},
|
| 16810 |
+
{
|
| 16811 |
+
"epoch": 0.73,
|
| 16812 |
+
"learning_rate": 1.9566887879846318e-05,
|
| 16813 |
+
"loss": 0.886,
|
| 16814 |
+
"step": 2801
|
| 16815 |
+
},
|
| 16816 |
+
{
|
| 16817 |
+
"epoch": 0.73,
|
| 16818 |
+
"learning_rate": 1.9573873559203632e-05,
|
| 16819 |
+
"loss": 0.8086,
|
| 16820 |
+
"step": 2802
|
| 16821 |
+
},
|
| 16822 |
+
{
|
| 16823 |
+
"epoch": 0.73,
|
| 16824 |
+
"learning_rate": 1.9580859238560953e-05,
|
| 16825 |
+
"loss": 0.9044,
|
| 16826 |
+
"step": 2803
|
| 16827 |
+
},
|
| 16828 |
+
{
|
| 16829 |
+
"epoch": 0.73,
|
| 16830 |
+
"learning_rate": 1.9587844917918268e-05,
|
| 16831 |
+
"loss": 0.8787,
|
| 16832 |
+
"step": 2804
|
| 16833 |
+
},
|
| 16834 |
+
{
|
| 16835 |
+
"epoch": 0.73,
|
| 16836 |
+
"learning_rate": 1.9594830597275585e-05,
|
| 16837 |
+
"loss": 0.9154,
|
| 16838 |
+
"step": 2805
|
| 16839 |
+
},
|
| 16840 |
+
{
|
| 16841 |
+
"epoch": 0.74,
|
| 16842 |
+
"learning_rate": 1.9601816276632903e-05,
|
| 16843 |
+
"loss": 0.9251,
|
| 16844 |
+
"step": 2806
|
| 16845 |
+
},
|
| 16846 |
+
{
|
| 16847 |
+
"epoch": 0.74,
|
| 16848 |
+
"learning_rate": 1.960880195599022e-05,
|
| 16849 |
+
"loss": 0.7256,
|
| 16850 |
+
"step": 2807
|
| 16851 |
+
},
|
| 16852 |
+
{
|
| 16853 |
+
"epoch": 0.74,
|
| 16854 |
+
"learning_rate": 1.961578763534754e-05,
|
| 16855 |
+
"loss": 0.8306,
|
| 16856 |
+
"step": 2808
|
| 16857 |
+
},
|
| 16858 |
+
{
|
| 16859 |
+
"epoch": 0.74,
|
| 16860 |
+
"learning_rate": 1.9622773314704857e-05,
|
| 16861 |
+
"loss": 0.9783,
|
| 16862 |
+
"step": 2809
|
| 16863 |
+
},
|
| 16864 |
+
{
|
| 16865 |
+
"epoch": 0.74,
|
| 16866 |
+
"learning_rate": 1.9629758994062175e-05,
|
| 16867 |
+
"loss": 0.8982,
|
| 16868 |
+
"step": 2810
|
| 16869 |
+
},
|
| 16870 |
+
{
|
| 16871 |
+
"epoch": 0.74,
|
| 16872 |
+
"learning_rate": 1.9636744673419492e-05,
|
| 16873 |
+
"loss": 0.8845,
|
| 16874 |
+
"step": 2811
|
| 16875 |
+
},
|
| 16876 |
+
{
|
| 16877 |
+
"epoch": 0.74,
|
| 16878 |
+
"learning_rate": 1.964373035277681e-05,
|
| 16879 |
+
"loss": 0.8706,
|
| 16880 |
+
"step": 2812
|
| 16881 |
+
},
|
| 16882 |
+
{
|
| 16883 |
+
"epoch": 0.74,
|
| 16884 |
+
"learning_rate": 1.9650716032134128e-05,
|
| 16885 |
+
"loss": 0.7391,
|
| 16886 |
+
"step": 2813
|
| 16887 |
+
},
|
| 16888 |
+
{
|
| 16889 |
+
"epoch": 0.74,
|
| 16890 |
+
"learning_rate": 1.9657701711491442e-05,
|
| 16891 |
+
"loss": 0.7833,
|
| 16892 |
+
"step": 2814
|
| 16893 |
+
},
|
| 16894 |
+
{
|
| 16895 |
+
"epoch": 0.74,
|
| 16896 |
+
"learning_rate": 1.9664687390848764e-05,
|
| 16897 |
+
"loss": 0.7923,
|
| 16898 |
+
"step": 2815
|
| 16899 |
+
},
|
| 16900 |
+
{
|
| 16901 |
+
"epoch": 0.74,
|
| 16902 |
+
"learning_rate": 1.9671673070206078e-05,
|
| 16903 |
+
"loss": 1.1248,
|
| 16904 |
+
"step": 2816
|
| 16905 |
+
},
|
| 16906 |
+
{
|
| 16907 |
+
"epoch": 0.74,
|
| 16908 |
+
"learning_rate": 1.9678658749563396e-05,
|
| 16909 |
+
"loss": 0.7786,
|
| 16910 |
+
"step": 2817
|
| 16911 |
+
},
|
| 16912 |
+
{
|
| 16913 |
+
"epoch": 0.74,
|
| 16914 |
+
"learning_rate": 1.9685644428920714e-05,
|
| 16915 |
+
"loss": 0.9508,
|
| 16916 |
+
"step": 2818
|
| 16917 |
+
},
|
| 16918 |
+
{
|
| 16919 |
+
"epoch": 0.74,
|
| 16920 |
+
"learning_rate": 1.969263010827803e-05,
|
| 16921 |
+
"loss": 0.9588,
|
| 16922 |
+
"step": 2819
|
| 16923 |
+
},
|
| 16924 |
+
{
|
| 16925 |
+
"epoch": 0.74,
|
| 16926 |
+
"learning_rate": 1.969961578763535e-05,
|
| 16927 |
+
"loss": 0.9941,
|
| 16928 |
+
"step": 2820
|
| 16929 |
+
},
|
| 16930 |
+
{
|
| 16931 |
+
"epoch": 0.74,
|
| 16932 |
+
"learning_rate": 1.9706601466992667e-05,
|
| 16933 |
+
"loss": 0.7582,
|
| 16934 |
+
"step": 2821
|
| 16935 |
+
},
|
| 16936 |
+
{
|
| 16937 |
+
"epoch": 0.74,
|
| 16938 |
+
"learning_rate": 1.9713587146349985e-05,
|
| 16939 |
+
"loss": 0.8907,
|
| 16940 |
+
"step": 2822
|
| 16941 |
+
},
|
| 16942 |
+
{
|
| 16943 |
+
"epoch": 0.74,
|
| 16944 |
+
"learning_rate": 1.9720572825707303e-05,
|
| 16945 |
+
"loss": 0.8466,
|
| 16946 |
+
"step": 2823
|
| 16947 |
+
},
|
| 16948 |
+
{
|
| 16949 |
+
"epoch": 0.74,
|
| 16950 |
+
"learning_rate": 1.972755850506462e-05,
|
| 16951 |
+
"loss": 0.7789,
|
| 16952 |
+
"step": 2824
|
| 16953 |
+
},
|
| 16954 |
+
{
|
| 16955 |
+
"epoch": 0.74,
|
| 16956 |
+
"learning_rate": 1.973454418442194e-05,
|
| 16957 |
+
"loss": 0.7568,
|
| 16958 |
+
"step": 2825
|
| 16959 |
+
},
|
| 16960 |
+
{
|
| 16961 |
+
"epoch": 0.74,
|
| 16962 |
+
"learning_rate": 1.9741529863779253e-05,
|
| 16963 |
+
"loss": 0.9138,
|
| 16964 |
+
"step": 2826
|
| 16965 |
+
},
|
| 16966 |
+
{
|
| 16967 |
+
"epoch": 0.74,
|
| 16968 |
+
"learning_rate": 1.9748515543136574e-05,
|
| 16969 |
+
"loss": 0.8781,
|
| 16970 |
+
"step": 2827
|
| 16971 |
+
},
|
| 16972 |
+
{
|
| 16973 |
+
"epoch": 0.74,
|
| 16974 |
+
"learning_rate": 1.975550122249389e-05,
|
| 16975 |
+
"loss": 0.8798,
|
| 16976 |
+
"step": 2828
|
| 16977 |
+
},
|
| 16978 |
+
{
|
| 16979 |
+
"epoch": 0.74,
|
| 16980 |
+
"learning_rate": 1.9762486901851206e-05,
|
| 16981 |
+
"loss": 0.6543,
|
| 16982 |
+
"step": 2829
|
| 16983 |
+
},
|
| 16984 |
+
{
|
| 16985 |
+
"epoch": 0.74,
|
| 16986 |
+
"learning_rate": 1.9769472581208524e-05,
|
| 16987 |
+
"loss": 0.7894,
|
| 16988 |
+
"step": 2830
|
| 16989 |
+
},
|
| 16990 |
+
{
|
| 16991 |
+
"epoch": 0.74,
|
| 16992 |
+
"learning_rate": 1.9776458260565842e-05,
|
| 16993 |
+
"loss": 0.73,
|
| 16994 |
+
"step": 2831
|
| 16995 |
+
},
|
| 16996 |
+
{
|
| 16997 |
+
"epoch": 0.74,
|
| 16998 |
+
"learning_rate": 1.978344393992316e-05,
|
| 16999 |
+
"loss": 1.0953,
|
| 17000 |
+
"step": 2832
|
| 17001 |
+
},
|
| 17002 |
+
{
|
| 17003 |
+
"epoch": 0.74,
|
| 17004 |
+
"learning_rate": 1.9790429619280474e-05,
|
| 17005 |
+
"loss": 0.8592,
|
| 17006 |
+
"step": 2833
|
| 17007 |
+
},
|
| 17008 |
+
{
|
| 17009 |
+
"epoch": 0.74,
|
| 17010 |
+
"learning_rate": 1.9797415298637795e-05,
|
| 17011 |
+
"loss": 0.888,
|
| 17012 |
+
"step": 2834
|
| 17013 |
+
},
|
| 17014 |
+
{
|
| 17015 |
+
"epoch": 0.74,
|
| 17016 |
+
"learning_rate": 1.980440097799511e-05,
|
| 17017 |
+
"loss": 0.8069,
|
| 17018 |
+
"step": 2835
|
| 17019 |
+
},
|
| 17020 |
+
{
|
| 17021 |
+
"epoch": 0.74,
|
| 17022 |
+
"learning_rate": 1.981138665735243e-05,
|
| 17023 |
+
"loss": 0.8407,
|
| 17024 |
+
"step": 2836
|
| 17025 |
+
},
|
| 17026 |
+
{
|
| 17027 |
+
"epoch": 0.74,
|
| 17028 |
+
"learning_rate": 1.9818372336709745e-05,
|
| 17029 |
+
"loss": 0.9071,
|
| 17030 |
+
"step": 2837
|
| 17031 |
+
},
|
| 17032 |
+
{
|
| 17033 |
+
"epoch": 0.74,
|
| 17034 |
+
"learning_rate": 1.9825358016067063e-05,
|
| 17035 |
+
"loss": 0.7468,
|
| 17036 |
+
"step": 2838
|
| 17037 |
+
},
|
| 17038 |
+
{
|
| 17039 |
+
"epoch": 0.74,
|
| 17040 |
+
"learning_rate": 1.983234369542438e-05,
|
| 17041 |
+
"loss": 0.7496,
|
| 17042 |
+
"step": 2839
|
| 17043 |
+
},
|
| 17044 |
+
{
|
| 17045 |
+
"epoch": 0.74,
|
| 17046 |
+
"learning_rate": 1.98393293747817e-05,
|
| 17047 |
+
"loss": 0.8522,
|
| 17048 |
+
"step": 2840
|
| 17049 |
+
},
|
| 17050 |
+
{
|
| 17051 |
+
"epoch": 0.74,
|
| 17052 |
+
"learning_rate": 1.9846315054139017e-05,
|
| 17053 |
+
"loss": 0.9676,
|
| 17054 |
+
"step": 2841
|
| 17055 |
+
},
|
| 17056 |
+
{
|
| 17057 |
+
"epoch": 0.74,
|
| 17058 |
+
"learning_rate": 1.9853300733496334e-05,
|
| 17059 |
+
"loss": 0.9409,
|
| 17060 |
+
"step": 2842
|
| 17061 |
+
},
|
| 17062 |
+
{
|
| 17063 |
+
"epoch": 0.74,
|
| 17064 |
+
"learning_rate": 1.9860286412853652e-05,
|
| 17065 |
+
"loss": 0.9343,
|
| 17066 |
+
"step": 2843
|
| 17067 |
+
},
|
| 17068 |
+
{
|
| 17069 |
+
"epoch": 0.74,
|
| 17070 |
+
"learning_rate": 1.986727209221097e-05,
|
| 17071 |
+
"loss": 0.9985,
|
| 17072 |
+
"step": 2844
|
| 17073 |
+
},
|
| 17074 |
+
{
|
| 17075 |
+
"epoch": 0.75,
|
| 17076 |
+
"learning_rate": 1.9874257771568284e-05,
|
| 17077 |
+
"loss": 0.7798,
|
| 17078 |
+
"step": 2845
|
| 17079 |
+
},
|
| 17080 |
+
{
|
| 17081 |
+
"epoch": 0.75,
|
| 17082 |
+
"learning_rate": 1.9881243450925606e-05,
|
| 17083 |
+
"loss": 0.788,
|
| 17084 |
+
"step": 2846
|
| 17085 |
+
},
|
| 17086 |
+
{
|
| 17087 |
+
"epoch": 0.75,
|
| 17088 |
+
"learning_rate": 1.988822913028292e-05,
|
| 17089 |
+
"loss": 0.8491,
|
| 17090 |
+
"step": 2847
|
| 17091 |
+
},
|
| 17092 |
+
{
|
| 17093 |
+
"epoch": 0.75,
|
| 17094 |
+
"learning_rate": 1.989521480964024e-05,
|
| 17095 |
+
"loss": 0.8453,
|
| 17096 |
+
"step": 2848
|
| 17097 |
+
},
|
| 17098 |
+
{
|
| 17099 |
+
"epoch": 0.75,
|
| 17100 |
+
"learning_rate": 1.9902200488997556e-05,
|
| 17101 |
+
"loss": 0.9333,
|
| 17102 |
+
"step": 2849
|
| 17103 |
+
},
|
| 17104 |
+
{
|
| 17105 |
+
"epoch": 0.75,
|
| 17106 |
+
"learning_rate": 1.9909186168354873e-05,
|
| 17107 |
+
"loss": 0.8421,
|
| 17108 |
+
"step": 2850
|
| 17109 |
+
},
|
| 17110 |
+
{
|
| 17111 |
+
"epoch": 0.75,
|
| 17112 |
+
"learning_rate": 1.991617184771219e-05,
|
| 17113 |
+
"loss": 0.8588,
|
| 17114 |
+
"step": 2851
|
| 17115 |
+
},
|
| 17116 |
+
{
|
| 17117 |
+
"epoch": 0.75,
|
| 17118 |
+
"learning_rate": 1.992315752706951e-05,
|
| 17119 |
+
"loss": 1.253,
|
| 17120 |
+
"step": 2852
|
| 17121 |
+
},
|
| 17122 |
+
{
|
| 17123 |
+
"epoch": 0.75,
|
| 17124 |
+
"learning_rate": 1.9930143206426827e-05,
|
| 17125 |
+
"loss": 0.8091,
|
| 17126 |
+
"step": 2853
|
| 17127 |
+
},
|
| 17128 |
+
{
|
| 17129 |
+
"epoch": 0.75,
|
| 17130 |
+
"learning_rate": 1.9937128885784145e-05,
|
| 17131 |
+
"loss": 0.79,
|
| 17132 |
+
"step": 2854
|
| 17133 |
+
},
|
| 17134 |
+
{
|
| 17135 |
+
"epoch": 0.75,
|
| 17136 |
+
"learning_rate": 1.9944114565141463e-05,
|
| 17137 |
+
"loss": 0.7981,
|
| 17138 |
+
"step": 2855
|
| 17139 |
+
},
|
| 17140 |
+
{
|
| 17141 |
+
"epoch": 0.75,
|
| 17142 |
+
"learning_rate": 1.995110024449878e-05,
|
| 17143 |
+
"loss": 0.851,
|
| 17144 |
+
"step": 2856
|
| 17145 |
+
},
|
| 17146 |
+
{
|
| 17147 |
+
"epoch": 0.75,
|
| 17148 |
+
"learning_rate": 1.9958085923856095e-05,
|
| 17149 |
+
"loss": 0.7314,
|
| 17150 |
+
"step": 2857
|
| 17151 |
+
},
|
| 17152 |
+
{
|
| 17153 |
+
"epoch": 0.75,
|
| 17154 |
+
"learning_rate": 1.9965071603213416e-05,
|
| 17155 |
+
"loss": 0.8802,
|
| 17156 |
+
"step": 2858
|
| 17157 |
+
},
|
| 17158 |
+
{
|
| 17159 |
+
"epoch": 0.75,
|
| 17160 |
+
"learning_rate": 1.997205728257073e-05,
|
| 17161 |
+
"loss": 0.9579,
|
| 17162 |
+
"step": 2859
|
| 17163 |
+
},
|
| 17164 |
+
{
|
| 17165 |
+
"epoch": 0.75,
|
| 17166 |
+
"learning_rate": 1.997904296192805e-05,
|
| 17167 |
+
"loss": 0.8267,
|
| 17168 |
+
"step": 2860
|
| 17169 |
+
},
|
| 17170 |
+
{
|
| 17171 |
+
"epoch": 0.75,
|
| 17172 |
+
"learning_rate": 1.9986028641285366e-05,
|
| 17173 |
+
"loss": 0.8196,
|
| 17174 |
+
"step": 2861
|
| 17175 |
+
},
|
| 17176 |
+
{
|
| 17177 |
+
"epoch": 0.75,
|
| 17178 |
+
"learning_rate": 1.9993014320642684e-05,
|
| 17179 |
+
"loss": 0.9573,
|
| 17180 |
+
"step": 2862
|
| 17181 |
+
},
|
| 17182 |
+
{
|
| 17183 |
+
"epoch": 0.75,
|
| 17184 |
+
"learning_rate": 2e-05,
|
| 17185 |
+
"loss": 0.8303,
|
| 17186 |
+
"step": 2863
|
| 17187 |
+
},
|
| 17188 |
+
{
|
| 17189 |
+
"epoch": 0.75,
|
| 17190 |
+
"learning_rate": 1.9999999994240245e-05,
|
| 17191 |
+
"loss": 0.9287,
|
| 17192 |
+
"step": 2864
|
| 17193 |
+
},
|
| 17194 |
+
{
|
| 17195 |
+
"epoch": 0.75,
|
| 17196 |
+
"learning_rate": 1.9999999976960966e-05,
|
| 17197 |
+
"loss": 0.9562,
|
| 17198 |
+
"step": 2865
|
| 17199 |
+
},
|
| 17200 |
+
{
|
| 17201 |
+
"epoch": 0.75,
|
| 17202 |
+
"learning_rate": 1.9999999948162175e-05,
|
| 17203 |
+
"loss": 0.7653,
|
| 17204 |
+
"step": 2866
|
| 17205 |
+
},
|
| 17206 |
+
{
|
| 17207 |
+
"epoch": 0.75,
|
| 17208 |
+
"learning_rate": 1.999999990784386e-05,
|
| 17209 |
+
"loss": 0.7886,
|
| 17210 |
+
"step": 2867
|
| 17211 |
+
},
|
| 17212 |
+
{
|
| 17213 |
+
"epoch": 0.75,
|
| 17214 |
+
"learning_rate": 1.9999999856006034e-05,
|
| 17215 |
+
"loss": 0.9139,
|
| 17216 |
+
"step": 2868
|
| 17217 |
+
},
|
| 17218 |
+
{
|
| 17219 |
+
"epoch": 0.75,
|
| 17220 |
+
"learning_rate": 1.9999999792648688e-05,
|
| 17221 |
+
"loss": 0.9877,
|
| 17222 |
+
"step": 2869
|
| 17223 |
+
},
|
| 17224 |
+
{
|
| 17225 |
+
"epoch": 0.75,
|
| 17226 |
+
"learning_rate": 1.999999971777183e-05,
|
| 17227 |
+
"loss": 0.9159,
|
| 17228 |
+
"step": 2870
|
| 17229 |
+
},
|
| 17230 |
+
{
|
| 17231 |
+
"epoch": 0.75,
|
| 17232 |
+
"learning_rate": 1.9999999631375447e-05,
|
| 17233 |
+
"loss": 1.0662,
|
| 17234 |
+
"step": 2871
|
| 17235 |
+
},
|
| 17236 |
+
{
|
| 17237 |
+
"epoch": 0.75,
|
| 17238 |
+
"learning_rate": 1.999999953345955e-05,
|
| 17239 |
+
"loss": 1.0571,
|
| 17240 |
+
"step": 2872
|
| 17241 |
+
},
|
| 17242 |
+
{
|
| 17243 |
+
"epoch": 0.75,
|
| 17244 |
+
"learning_rate": 1.9999999424024137e-05,
|
| 17245 |
+
"loss": 0.9049,
|
| 17246 |
+
"step": 2873
|
| 17247 |
+
},
|
| 17248 |
+
{
|
| 17249 |
+
"epoch": 0.75,
|
| 17250 |
+
"learning_rate": 1.9999999303069207e-05,
|
| 17251 |
+
"loss": 1.0672,
|
| 17252 |
+
"step": 2874
|
| 17253 |
+
},
|
| 17254 |
+
{
|
| 17255 |
+
"epoch": 0.75,
|
| 17256 |
+
"learning_rate": 1.999999917059476e-05,
|
| 17257 |
+
"loss": 0.9257,
|
| 17258 |
+
"step": 2875
|
| 17259 |
+
},
|
| 17260 |
+
{
|
| 17261 |
+
"epoch": 0.75,
|
| 17262 |
+
"learning_rate": 1.9999999026600798e-05,
|
| 17263 |
+
"loss": 0.8201,
|
| 17264 |
+
"step": 2876
|
| 17265 |
+
},
|
| 17266 |
+
{
|
| 17267 |
+
"epoch": 0.75,
|
| 17268 |
+
"learning_rate": 1.9999998871087317e-05,
|
| 17269 |
+
"loss": 0.6845,
|
| 17270 |
+
"step": 2877
|
| 17271 |
+
},
|
| 17272 |
+
{
|
| 17273 |
+
"epoch": 0.75,
|
| 17274 |
+
"learning_rate": 1.999999870405432e-05,
|
| 17275 |
+
"loss": 0.8959,
|
| 17276 |
+
"step": 2878
|
| 17277 |
+
},
|
| 17278 |
+
{
|
| 17279 |
+
"epoch": 0.75,
|
| 17280 |
+
"learning_rate": 1.9999998525501808e-05,
|
| 17281 |
+
"loss": 0.8993,
|
| 17282 |
+
"step": 2879
|
| 17283 |
+
},
|
| 17284 |
+
{
|
| 17285 |
+
"epoch": 0.75,
|
| 17286 |
+
"learning_rate": 1.9999998335429783e-05,
|
| 17287 |
+
"loss": 0.7234,
|
| 17288 |
+
"step": 2880
|
| 17289 |
+
},
|
| 17290 |
+
{
|
| 17291 |
+
"epoch": 0.75,
|
| 17292 |
+
"learning_rate": 1.999999813383824e-05,
|
| 17293 |
+
"loss": 1.0317,
|
| 17294 |
+
"step": 2881
|
| 17295 |
+
},
|
| 17296 |
+
{
|
| 17297 |
+
"epoch": 0.75,
|
| 17298 |
+
"learning_rate": 1.9999997920727182e-05,
|
| 17299 |
+
"loss": 0.8622,
|
| 17300 |
+
"step": 2882
|
| 17301 |
+
},
|
| 17302 |
+
{
|
| 17303 |
+
"epoch": 0.76,
|
| 17304 |
+
"learning_rate": 1.999999769609661e-05,
|
| 17305 |
+
"loss": 0.6794,
|
| 17306 |
+
"step": 2883
|
| 17307 |
+
},
|
| 17308 |
+
{
|
| 17309 |
+
"epoch": 0.76,
|
| 17310 |
+
"learning_rate": 1.9999997459946517e-05,
|
| 17311 |
+
"loss": 0.846,
|
| 17312 |
+
"step": 2884
|
| 17313 |
+
},
|
| 17314 |
+
{
|
| 17315 |
+
"epoch": 0.76,
|
| 17316 |
+
"learning_rate": 1.999999721227692e-05,
|
| 17317 |
+
"loss": 0.7269,
|
| 17318 |
+
"step": 2885
|
| 17319 |
+
},
|
| 17320 |
+
{
|
| 17321 |
+
"epoch": 0.76,
|
| 17322 |
+
"learning_rate": 1.99999969530878e-05,
|
| 17323 |
+
"loss": 0.8606,
|
| 17324 |
+
"step": 2886
|
| 17325 |
+
},
|
| 17326 |
+
{
|
| 17327 |
+
"epoch": 0.76,
|
| 17328 |
+
"learning_rate": 1.999999668237917e-05,
|
| 17329 |
+
"loss": 1.0684,
|
| 17330 |
+
"step": 2887
|
| 17331 |
+
},
|
| 17332 |
+
{
|
| 17333 |
+
"epoch": 0.76,
|
| 17334 |
+
"learning_rate": 1.9999996400151026e-05,
|
| 17335 |
+
"loss": 1.0371,
|
| 17336 |
+
"step": 2888
|
| 17337 |
+
},
|
| 17338 |
+
{
|
| 17339 |
+
"epoch": 0.76,
|
| 17340 |
+
"learning_rate": 1.999999610640337e-05,
|
| 17341 |
+
"loss": 0.8625,
|
| 17342 |
+
"step": 2889
|
| 17343 |
+
},
|
| 17344 |
+
{
|
| 17345 |
+
"epoch": 0.76,
|
| 17346 |
+
"learning_rate": 1.9999995801136197e-05,
|
| 17347 |
+
"loss": 0.8155,
|
| 17348 |
+
"step": 2890
|
| 17349 |
+
},
|
| 17350 |
+
{
|
| 17351 |
+
"epoch": 0.76,
|
| 17352 |
+
"learning_rate": 1.9999995484349517e-05,
|
| 17353 |
+
"loss": 0.9766,
|
| 17354 |
+
"step": 2891
|
| 17355 |
+
},
|
| 17356 |
+
{
|
| 17357 |
+
"epoch": 0.76,
|
| 17358 |
+
"learning_rate": 1.999999515604332e-05,
|
| 17359 |
+
"loss": 0.9301,
|
| 17360 |
+
"step": 2892
|
| 17361 |
+
},
|
| 17362 |
+
{
|
| 17363 |
+
"epoch": 0.76,
|
| 17364 |
+
"learning_rate": 1.9999994816217613e-05,
|
| 17365 |
+
"loss": 0.8873,
|
| 17366 |
+
"step": 2893
|
| 17367 |
+
},
|
| 17368 |
+
{
|
| 17369 |
+
"epoch": 0.76,
|
| 17370 |
+
"learning_rate": 1.9999994464872395e-05,
|
| 17371 |
+
"loss": 1.0211,
|
| 17372 |
+
"step": 2894
|
| 17373 |
+
},
|
| 17374 |
+
{
|
| 17375 |
+
"epoch": 0.76,
|
| 17376 |
+
"learning_rate": 1.9999994102007668e-05,
|
| 17377 |
+
"loss": 1.0326,
|
| 17378 |
+
"step": 2895
|
| 17379 |
+
},
|
| 17380 |
+
{
|
| 17381 |
+
"epoch": 0.76,
|
| 17382 |
+
"learning_rate": 1.9999993727623425e-05,
|
| 17383 |
+
"loss": 0.9601,
|
| 17384 |
+
"step": 2896
|
| 17385 |
+
},
|
| 17386 |
+
{
|
| 17387 |
+
"epoch": 0.76,
|
| 17388 |
+
"learning_rate": 1.9999993341719673e-05,
|
| 17389 |
+
"loss": 0.8871,
|
| 17390 |
+
"step": 2897
|
| 17391 |
+
},
|
| 17392 |
+
{
|
| 17393 |
+
"epoch": 0.76,
|
| 17394 |
+
"learning_rate": 1.9999992944296416e-05,
|
| 17395 |
+
"loss": 0.9633,
|
| 17396 |
+
"step": 2898
|
| 17397 |
+
},
|
| 17398 |
+
{
|
| 17399 |
+
"epoch": 0.76,
|
| 17400 |
+
"learning_rate": 1.999999253535365e-05,
|
| 17401 |
+
"loss": 0.9406,
|
| 17402 |
+
"step": 2899
|
| 17403 |
+
},
|
| 17404 |
+
{
|
| 17405 |
+
"epoch": 0.76,
|
| 17406 |
+
"learning_rate": 1.999999211489137e-05,
|
| 17407 |
+
"loss": 0.7638,
|
| 17408 |
+
"step": 2900
|
| 17409 |
+
},
|
| 17410 |
+
{
|
| 17411 |
+
"epoch": 0.76,
|
| 17412 |
+
"learning_rate": 1.999999168290958e-05,
|
| 17413 |
+
"loss": 0.6985,
|
| 17414 |
+
"step": 2901
|
| 17415 |
+
},
|
| 17416 |
+
{
|
| 17417 |
+
"epoch": 0.76,
|
| 17418 |
+
"learning_rate": 1.9999991239408286e-05,
|
| 17419 |
+
"loss": 0.7416,
|
| 17420 |
+
"step": 2902
|
| 17421 |
+
},
|
| 17422 |
+
{
|
| 17423 |
+
"epoch": 0.76,
|
| 17424 |
+
"learning_rate": 1.9999990784387486e-05,
|
| 17425 |
+
"loss": 0.8148,
|
| 17426 |
+
"step": 2903
|
| 17427 |
+
},
|
| 17428 |
+
{
|
| 17429 |
+
"epoch": 0.76,
|
| 17430 |
+
"learning_rate": 1.999999031784718e-05,
|
| 17431 |
+
"loss": 0.9854,
|
| 17432 |
+
"step": 2904
|
| 17433 |
+
},
|
| 17434 |
+
{
|
| 17435 |
+
"epoch": 0.76,
|
| 17436 |
+
"learning_rate": 1.9999989839787365e-05,
|
| 17437 |
+
"loss": 0.772,
|
| 17438 |
+
"step": 2905
|
| 17439 |
+
},
|
| 17440 |
+
{
|
| 17441 |
+
"epoch": 0.76,
|
| 17442 |
+
"learning_rate": 1.9999989350208044e-05,
|
| 17443 |
+
"loss": 0.8585,
|
| 17444 |
+
"step": 2906
|
| 17445 |
+
},
|
| 17446 |
+
{
|
| 17447 |
+
"epoch": 0.76,
|
| 17448 |
+
"learning_rate": 1.9999988849109217e-05,
|
| 17449 |
+
"loss": 0.7457,
|
| 17450 |
+
"step": 2907
|
| 17451 |
+
},
|
| 17452 |
+
{
|
| 17453 |
+
"epoch": 0.76,
|
| 17454 |
+
"learning_rate": 1.9999988336490888e-05,
|
| 17455 |
+
"loss": 0.6919,
|
| 17456 |
+
"step": 2908
|
| 17457 |
+
},
|
| 17458 |
+
{
|
| 17459 |
+
"epoch": 0.76,
|
| 17460 |
+
"learning_rate": 1.9999987812353053e-05,
|
| 17461 |
+
"loss": 1.0138,
|
| 17462 |
+
"step": 2909
|
| 17463 |
+
},
|
| 17464 |
+
{
|
| 17465 |
+
"epoch": 0.76,
|
| 17466 |
+
"learning_rate": 1.9999987276695716e-05,
|
| 17467 |
+
"loss": 0.9937,
|
| 17468 |
+
"step": 2910
|
| 17469 |
+
},
|
| 17470 |
+
{
|
| 17471 |
+
"epoch": 0.76,
|
| 17472 |
+
"learning_rate": 1.9999986729518876e-05,
|
| 17473 |
+
"loss": 0.8485,
|
| 17474 |
+
"step": 2911
|
| 17475 |
+
},
|
| 17476 |
+
{
|
| 17477 |
+
"epoch": 0.76,
|
| 17478 |
+
"learning_rate": 1.9999986170822537e-05,
|
| 17479 |
+
"loss": 0.8009,
|
| 17480 |
+
"step": 2912
|
| 17481 |
+
},
|
| 17482 |
+
{
|
| 17483 |
+
"epoch": 0.76,
|
| 17484 |
+
"learning_rate": 1.999998560060669e-05,
|
| 17485 |
+
"loss": 0.8124,
|
| 17486 |
+
"step": 2913
|
| 17487 |
+
},
|
| 17488 |
+
{
|
| 17489 |
+
"epoch": 0.76,
|
| 17490 |
+
"learning_rate": 1.9999985018871346e-05,
|
| 17491 |
+
"loss": 0.6622,
|
| 17492 |
+
"step": 2914
|
| 17493 |
+
},
|
| 17494 |
+
{
|
| 17495 |
+
"epoch": 0.76,
|
| 17496 |
+
"learning_rate": 1.9999984425616503e-05,
|
| 17497 |
+
"loss": 0.765,
|
| 17498 |
+
"step": 2915
|
| 17499 |
+
},
|
| 17500 |
+
{
|
| 17501 |
+
"epoch": 0.76,
|
| 17502 |
+
"learning_rate": 1.999998382084216e-05,
|
| 17503 |
+
"loss": 1.0559,
|
| 17504 |
+
"step": 2916
|
| 17505 |
+
},
|
| 17506 |
+
{
|
| 17507 |
+
"epoch": 0.76,
|
| 17508 |
+
"learning_rate": 1.9999983204548314e-05,
|
| 17509 |
+
"loss": 0.8566,
|
| 17510 |
+
"step": 2917
|
| 17511 |
+
},
|
| 17512 |
+
{
|
| 17513 |
+
"epoch": 0.76,
|
| 17514 |
+
"learning_rate": 1.9999982576734975e-05,
|
| 17515 |
+
"loss": 1.0069,
|
| 17516 |
+
"step": 2918
|
| 17517 |
+
},
|
| 17518 |
+
{
|
| 17519 |
+
"epoch": 0.76,
|
| 17520 |
+
"learning_rate": 1.9999981937402136e-05,
|
| 17521 |
+
"loss": 0.9397,
|
| 17522 |
+
"step": 2919
|
| 17523 |
+
},
|
| 17524 |
+
{
|
| 17525 |
+
"epoch": 0.76,
|
| 17526 |
+
"learning_rate": 1.9999981286549802e-05,
|
| 17527 |
+
"loss": 0.7391,
|
| 17528 |
+
"step": 2920
|
| 17529 |
+
},
|
| 17530 |
+
{
|
| 17531 |
+
"epoch": 0.77,
|
| 17532 |
+
"learning_rate": 1.999998062417797e-05,
|
| 17533 |
+
"loss": 0.9099,
|
| 17534 |
+
"step": 2921
|
| 17535 |
+
},
|
| 17536 |
+
{
|
| 17537 |
+
"epoch": 0.77,
|
| 17538 |
+
"learning_rate": 1.9999979950286644e-05,
|
| 17539 |
+
"loss": 1.0357,
|
| 17540 |
+
"step": 2922
|
| 17541 |
+
},
|
| 17542 |
+
{
|
| 17543 |
+
"epoch": 0.77,
|
| 17544 |
+
"learning_rate": 1.9999979264875823e-05,
|
| 17545 |
+
"loss": 0.697,
|
| 17546 |
+
"step": 2923
|
| 17547 |
+
},
|
| 17548 |
+
{
|
| 17549 |
+
"epoch": 0.77,
|
| 17550 |
+
"learning_rate": 1.999997856794551e-05,
|
| 17551 |
+
"loss": 0.8421,
|
| 17552 |
+
"step": 2924
|
| 17553 |
+
},
|
| 17554 |
+
{
|
| 17555 |
+
"epoch": 0.77,
|
| 17556 |
+
"learning_rate": 1.9999977859495704e-05,
|
| 17557 |
+
"loss": 1.1129,
|
| 17558 |
+
"step": 2925
|
| 17559 |
+
},
|
| 17560 |
+
{
|
| 17561 |
+
"epoch": 0.77,
|
| 17562 |
+
"learning_rate": 1.9999977139526407e-05,
|
| 17563 |
+
"loss": 0.7987,
|
| 17564 |
+
"step": 2926
|
| 17565 |
+
},
|
| 17566 |
+
{
|
| 17567 |
+
"epoch": 0.77,
|
| 17568 |
+
"learning_rate": 1.9999976408037617e-05,
|
| 17569 |
+
"loss": 1.0268,
|
| 17570 |
+
"step": 2927
|
| 17571 |
+
},
|
| 17572 |
+
{
|
| 17573 |
+
"epoch": 0.77,
|
| 17574 |
+
"learning_rate": 1.9999975665029338e-05,
|
| 17575 |
+
"loss": 0.8611,
|
| 17576 |
+
"step": 2928
|
| 17577 |
+
},
|
| 17578 |
+
{
|
| 17579 |
+
"epoch": 0.77,
|
| 17580 |
+
"learning_rate": 1.9999974910501567e-05,
|
| 17581 |
+
"loss": 0.7429,
|
| 17582 |
+
"step": 2929
|
| 17583 |
+
},
|
| 17584 |
+
{
|
| 17585 |
+
"epoch": 0.77,
|
| 17586 |
+
"learning_rate": 1.999997414445431e-05,
|
| 17587 |
+
"loss": 0.7647,
|
| 17588 |
+
"step": 2930
|
| 17589 |
+
},
|
| 17590 |
+
{
|
| 17591 |
+
"epoch": 0.77,
|
| 17592 |
+
"learning_rate": 1.9999973366887565e-05,
|
| 17593 |
+
"loss": 0.8636,
|
| 17594 |
+
"step": 2931
|
| 17595 |
+
},
|
| 17596 |
+
{
|
| 17597 |
+
"epoch": 0.77,
|
| 17598 |
+
"learning_rate": 1.9999972577801335e-05,
|
| 17599 |
+
"loss": 0.7698,
|
| 17600 |
+
"step": 2932
|
| 17601 |
+
},
|
| 17602 |
+
{
|
| 17603 |
+
"epoch": 0.77,
|
| 17604 |
+
"learning_rate": 1.9999971777195615e-05,
|
| 17605 |
+
"loss": 0.7541,
|
| 17606 |
+
"step": 2933
|
| 17607 |
+
},
|
| 17608 |
+
{
|
| 17609 |
+
"epoch": 0.77,
|
| 17610 |
+
"learning_rate": 1.9999970965070413e-05,
|
| 17611 |
+
"loss": 0.9377,
|
| 17612 |
+
"step": 2934
|
| 17613 |
+
},
|
| 17614 |
+
{
|
| 17615 |
+
"epoch": 0.77,
|
| 17616 |
+
"learning_rate": 1.9999970141425726e-05,
|
| 17617 |
+
"loss": 1.0373,
|
| 17618 |
+
"step": 2935
|
| 17619 |
+
},
|
| 17620 |
+
{
|
| 17621 |
+
"epoch": 0.77,
|
| 17622 |
+
"learning_rate": 1.9999969306261557e-05,
|
| 17623 |
+
"loss": 0.9226,
|
| 17624 |
+
"step": 2936
|
| 17625 |
+
},
|
| 17626 |
+
{
|
| 17627 |
+
"epoch": 0.77,
|
| 17628 |
+
"learning_rate": 1.9999968459577906e-05,
|
| 17629 |
+
"loss": 0.5701,
|
| 17630 |
+
"step": 2937
|
| 17631 |
+
},
|
| 17632 |
+
{
|
| 17633 |
+
"epoch": 0.77,
|
| 17634 |
+
"learning_rate": 1.9999967601374773e-05,
|
| 17635 |
+
"loss": 0.736,
|
| 17636 |
+
"step": 2938
|
| 17637 |
+
},
|
| 17638 |
+
{
|
| 17639 |
+
"epoch": 0.77,
|
| 17640 |
+
"learning_rate": 1.999996673165216e-05,
|
| 17641 |
+
"loss": 0.9827,
|
| 17642 |
+
"step": 2939
|
| 17643 |
+
},
|
| 17644 |
+
{
|
| 17645 |
+
"epoch": 0.77,
|
| 17646 |
+
"learning_rate": 1.9999965850410067e-05,
|
| 17647 |
+
"loss": 0.8412,
|
| 17648 |
+
"step": 2940
|
| 17649 |
+
},
|
| 17650 |
+
{
|
| 17651 |
+
"epoch": 0.77,
|
| 17652 |
+
"learning_rate": 1.9999964957648498e-05,
|
| 17653 |
+
"loss": 0.9093,
|
| 17654 |
+
"step": 2941
|
| 17655 |
+
},
|
| 17656 |
+
{
|
| 17657 |
+
"epoch": 0.77,
|
| 17658 |
+
"learning_rate": 1.999996405336745e-05,
|
| 17659 |
+
"loss": 0.8866,
|
| 17660 |
+
"step": 2942
|
| 17661 |
+
},
|
| 17662 |
+
{
|
| 17663 |
+
"epoch": 0.77,
|
| 17664 |
+
"learning_rate": 1.9999963137566926e-05,
|
| 17665 |
+
"loss": 0.7665,
|
| 17666 |
+
"step": 2943
|
| 17667 |
+
},
|
| 17668 |
+
{
|
| 17669 |
+
"epoch": 0.77,
|
| 17670 |
+
"learning_rate": 1.9999962210246928e-05,
|
| 17671 |
+
"loss": 0.8734,
|
| 17672 |
+
"step": 2944
|
| 17673 |
+
},
|
| 17674 |
+
{
|
| 17675 |
+
"epoch": 0.77,
|
| 17676 |
+
"learning_rate": 1.9999961271407457e-05,
|
| 17677 |
+
"loss": 0.857,
|
| 17678 |
+
"step": 2945
|
| 17679 |
+
},
|
| 17680 |
+
{
|
| 17681 |
+
"epoch": 0.77,
|
| 17682 |
+
"learning_rate": 1.999996032104851e-05,
|
| 17683 |
+
"loss": 0.9939,
|
| 17684 |
+
"step": 2946
|
| 17685 |
+
},
|
| 17686 |
+
{
|
| 17687 |
+
"epoch": 0.77,
|
| 17688 |
+
"learning_rate": 1.9999959359170097e-05,
|
| 17689 |
+
"loss": 0.6063,
|
| 17690 |
+
"step": 2947
|
| 17691 |
+
},
|
| 17692 |
+
{
|
| 17693 |
+
"epoch": 0.77,
|
| 17694 |
+
"learning_rate": 1.9999958385772208e-05,
|
| 17695 |
+
"loss": 0.9707,
|
| 17696 |
+
"step": 2948
|
| 17697 |
+
},
|
| 17698 |
+
{
|
| 17699 |
+
"epoch": 0.77,
|
| 17700 |
+
"learning_rate": 1.9999957400854853e-05,
|
| 17701 |
+
"loss": 1.1027,
|
| 17702 |
+
"step": 2949
|
| 17703 |
+
},
|
| 17704 |
+
{
|
| 17705 |
+
"epoch": 0.77,
|
| 17706 |
+
"learning_rate": 1.999995640441803e-05,
|
| 17707 |
+
"loss": 0.9763,
|
| 17708 |
+
"step": 2950
|
| 17709 |
+
},
|
| 17710 |
+
{
|
| 17711 |
+
"epoch": 0.77,
|
| 17712 |
+
"learning_rate": 1.9999955396461735e-05,
|
| 17713 |
+
"loss": 0.8547,
|
| 17714 |
+
"step": 2951
|
| 17715 |
+
},
|
| 17716 |
+
{
|
| 17717 |
+
"epoch": 0.77,
|
| 17718 |
+
"learning_rate": 1.9999954376985978e-05,
|
| 17719 |
+
"loss": 0.8491,
|
| 17720 |
+
"step": 2952
|
| 17721 |
+
},
|
| 17722 |
+
{
|
| 17723 |
+
"epoch": 0.77,
|
| 17724 |
+
"learning_rate": 1.9999953345990756e-05,
|
| 17725 |
+
"loss": 0.7691,
|
| 17726 |
+
"step": 2953
|
| 17727 |
+
},
|
| 17728 |
+
{
|
| 17729 |
+
"epoch": 0.77,
|
| 17730 |
+
"learning_rate": 1.999995230347607e-05,
|
| 17731 |
+
"loss": 0.6573,
|
| 17732 |
+
"step": 2954
|
| 17733 |
+
},
|
| 17734 |
+
{
|
| 17735 |
+
"epoch": 0.77,
|
| 17736 |
+
"learning_rate": 1.999995124944192e-05,
|
| 17737 |
+
"loss": 0.606,
|
| 17738 |
+
"step": 2955
|
| 17739 |
+
},
|
| 17740 |
+
{
|
| 17741 |
+
"epoch": 0.77,
|
| 17742 |
+
"learning_rate": 1.999995018388831e-05,
|
| 17743 |
+
"loss": 0.8487,
|
| 17744 |
+
"step": 2956
|
| 17745 |
+
},
|
| 17746 |
+
{
|
| 17747 |
+
"epoch": 0.77,
|
| 17748 |
+
"learning_rate": 1.999994910681524e-05,
|
| 17749 |
+
"loss": 0.8049,
|
| 17750 |
+
"step": 2957
|
| 17751 |
+
},
|
| 17752 |
+
{
|
| 17753 |
+
"epoch": 0.77,
|
| 17754 |
+
"learning_rate": 1.999994801822271e-05,
|
| 17755 |
+
"loss": 0.8717,
|
| 17756 |
+
"step": 2958
|
| 17757 |
+
},
|
| 17758 |
+
{
|
| 17759 |
+
"epoch": 0.78,
|
| 17760 |
+
"learning_rate": 1.9999946918110725e-05,
|
| 17761 |
+
"loss": 0.7348,
|
| 17762 |
+
"step": 2959
|
| 17763 |
+
},
|
| 17764 |
+
{
|
| 17765 |
+
"epoch": 0.78,
|
| 17766 |
+
"learning_rate": 1.999994580647928e-05,
|
| 17767 |
+
"loss": 0.8263,
|
| 17768 |
+
"step": 2960
|
| 17769 |
+
},
|
| 17770 |
+
{
|
| 17771 |
+
"epoch": 0.78,
|
| 17772 |
+
"learning_rate": 1.9999944683328382e-05,
|
| 17773 |
+
"loss": 0.9414,
|
| 17774 |
+
"step": 2961
|
| 17775 |
+
},
|
| 17776 |
+
{
|
| 17777 |
+
"epoch": 0.78,
|
| 17778 |
+
"learning_rate": 1.999994354865803e-05,
|
| 17779 |
+
"loss": 0.8196,
|
| 17780 |
+
"step": 2962
|
| 17781 |
+
},
|
| 17782 |
+
{
|
| 17783 |
+
"epoch": 0.78,
|
| 17784 |
+
"learning_rate": 1.9999942402468226e-05,
|
| 17785 |
+
"loss": 0.8192,
|
| 17786 |
+
"step": 2963
|
| 17787 |
+
},
|
| 17788 |
+
{
|
| 17789 |
+
"epoch": 0.78,
|
| 17790 |
+
"learning_rate": 1.9999941244758973e-05,
|
| 17791 |
+
"loss": 1.0364,
|
| 17792 |
+
"step": 2964
|
| 17793 |
+
},
|
| 17794 |
+
{
|
| 17795 |
+
"epoch": 0.78,
|
| 17796 |
+
"learning_rate": 1.9999940075530268e-05,
|
| 17797 |
+
"loss": 0.8511,
|
| 17798 |
+
"step": 2965
|
| 17799 |
+
},
|
| 17800 |
+
{
|
| 17801 |
+
"epoch": 0.78,
|
| 17802 |
+
"learning_rate": 1.9999938894782118e-05,
|
| 17803 |
+
"loss": 0.9189,
|
| 17804 |
+
"step": 2966
|
| 17805 |
+
},
|
| 17806 |
+
{
|
| 17807 |
+
"epoch": 0.78,
|
| 17808 |
+
"learning_rate": 1.9999937702514516e-05,
|
| 17809 |
+
"loss": 0.7836,
|
| 17810 |
+
"step": 2967
|
| 17811 |
+
},
|
| 17812 |
+
{
|
| 17813 |
+
"epoch": 0.78,
|
| 17814 |
+
"learning_rate": 1.9999936498727466e-05,
|
| 17815 |
+
"loss": 0.9566,
|
| 17816 |
+
"step": 2968
|
| 17817 |
+
},
|
| 17818 |
+
{
|
| 17819 |
+
"epoch": 0.78,
|
| 17820 |
+
"learning_rate": 1.999993528342098e-05,
|
| 17821 |
+
"loss": 0.8312,
|
| 17822 |
+
"step": 2969
|
| 17823 |
+
},
|
| 17824 |
+
{
|
| 17825 |
+
"epoch": 0.78,
|
| 17826 |
+
"learning_rate": 1.9999934056595046e-05,
|
| 17827 |
+
"loss": 1.0597,
|
| 17828 |
+
"step": 2970
|
| 17829 |
+
},
|
| 17830 |
+
{
|
| 17831 |
+
"epoch": 0.78,
|
| 17832 |
+
"learning_rate": 1.9999932818249672e-05,
|
| 17833 |
+
"loss": 0.9733,
|
| 17834 |
+
"step": 2971
|
| 17835 |
+
},
|
| 17836 |
+
{
|
| 17837 |
+
"epoch": 0.78,
|
| 17838 |
+
"learning_rate": 1.9999931568384857e-05,
|
| 17839 |
+
"loss": 0.9396,
|
| 17840 |
+
"step": 2972
|
| 17841 |
+
},
|
| 17842 |
+
{
|
| 17843 |
+
"epoch": 0.78,
|
| 17844 |
+
"learning_rate": 1.9999930307000604e-05,
|
| 17845 |
+
"loss": 0.7734,
|
| 17846 |
+
"step": 2973
|
| 17847 |
+
},
|
| 17848 |
+
{
|
| 17849 |
+
"epoch": 0.78,
|
| 17850 |
+
"learning_rate": 1.9999929034096916e-05,
|
| 17851 |
+
"loss": 0.748,
|
| 17852 |
+
"step": 2974
|
| 17853 |
+
},
|
| 17854 |
+
{
|
| 17855 |
+
"epoch": 0.78,
|
| 17856 |
+
"learning_rate": 1.999992774967379e-05,
|
| 17857 |
+
"loss": 0.7932,
|
| 17858 |
+
"step": 2975
|
| 17859 |
+
},
|
| 17860 |
+
{
|
| 17861 |
+
"epoch": 0.78,
|
| 17862 |
+
"learning_rate": 1.999992645373123e-05,
|
| 17863 |
+
"loss": 0.7041,
|
| 17864 |
+
"step": 2976
|
| 17865 |
+
},
|
| 17866 |
+
{
|
| 17867 |
+
"epoch": 0.78,
|
| 17868 |
+
"learning_rate": 1.9999925146269237e-05,
|
| 17869 |
+
"loss": 0.8427,
|
| 17870 |
+
"step": 2977
|
| 17871 |
+
},
|
| 17872 |
+
{
|
| 17873 |
+
"epoch": 0.78,
|
| 17874 |
+
"learning_rate": 1.9999923827287814e-05,
|
| 17875 |
+
"loss": 0.815,
|
| 17876 |
+
"step": 2978
|
| 17877 |
+
},
|
| 17878 |
+
{
|
| 17879 |
+
"epoch": 0.78,
|
| 17880 |
+
"learning_rate": 1.999992249678696e-05,
|
| 17881 |
+
"loss": 0.7921,
|
| 17882 |
+
"step": 2979
|
| 17883 |
+
},
|
| 17884 |
+
{
|
| 17885 |
+
"epoch": 0.78,
|
| 17886 |
+
"learning_rate": 1.9999921154766677e-05,
|
| 17887 |
+
"loss": 0.9674,
|
| 17888 |
+
"step": 2980
|
| 17889 |
+
},
|
| 17890 |
+
{
|
| 17891 |
+
"epoch": 0.78,
|
| 17892 |
+
"learning_rate": 1.999991980122697e-05,
|
| 17893 |
+
"loss": 0.9314,
|
| 17894 |
+
"step": 2981
|
| 17895 |
+
},
|
| 17896 |
+
{
|
| 17897 |
+
"epoch": 0.78,
|
| 17898 |
+
"learning_rate": 1.9999918436167836e-05,
|
| 17899 |
+
"loss": 0.8754,
|
| 17900 |
+
"step": 2982
|
| 17901 |
+
},
|
| 17902 |
+
{
|
| 17903 |
+
"epoch": 0.78,
|
| 17904 |
+
"learning_rate": 1.999991705958928e-05,
|
| 17905 |
+
"loss": 0.8286,
|
| 17906 |
+
"step": 2983
|
| 17907 |
+
},
|
| 17908 |
+
{
|
| 17909 |
+
"epoch": 0.78,
|
| 17910 |
+
"learning_rate": 1.99999156714913e-05,
|
| 17911 |
+
"loss": 0.9609,
|
| 17912 |
+
"step": 2984
|
| 17913 |
+
},
|
| 17914 |
+
{
|
| 17915 |
+
"epoch": 0.78,
|
| 17916 |
+
"learning_rate": 1.9999914271873903e-05,
|
| 17917 |
+
"loss": 0.9706,
|
| 17918 |
+
"step": 2985
|
| 17919 |
+
},
|
| 17920 |
+
{
|
| 17921 |
+
"epoch": 0.78,
|
| 17922 |
+
"learning_rate": 1.9999912860737084e-05,
|
| 17923 |
+
"loss": 0.9377,
|
| 17924 |
+
"step": 2986
|
| 17925 |
+
},
|
| 17926 |
+
{
|
| 17927 |
+
"epoch": 0.78,
|
| 17928 |
+
"learning_rate": 1.999991143808085e-05,
|
| 17929 |
+
"loss": 0.7978,
|
| 17930 |
+
"step": 2987
|
| 17931 |
+
},
|
| 17932 |
+
{
|
| 17933 |
+
"epoch": 0.78,
|
| 17934 |
+
"learning_rate": 1.99999100039052e-05,
|
| 17935 |
+
"loss": 0.9898,
|
| 17936 |
+
"step": 2988
|
| 17937 |
+
},
|
| 17938 |
+
{
|
| 17939 |
+
"epoch": 0.78,
|
| 17940 |
+
"learning_rate": 1.999990855821014e-05,
|
| 17941 |
+
"loss": 0.7036,
|
| 17942 |
+
"step": 2989
|
| 17943 |
+
},
|
| 17944 |
+
{
|
| 17945 |
+
"epoch": 0.78,
|
| 17946 |
+
"learning_rate": 1.9999907100995662e-05,
|
| 17947 |
+
"loss": 0.9527,
|
| 17948 |
+
"step": 2990
|
| 17949 |
+
},
|
| 17950 |
+
{
|
| 17951 |
+
"epoch": 0.78,
|
| 17952 |
+
"learning_rate": 1.9999905632261775e-05,
|
| 17953 |
+
"loss": 0.9587,
|
| 17954 |
+
"step": 2991
|
| 17955 |
+
},
|
| 17956 |
+
{
|
| 17957 |
+
"epoch": 0.78,
|
| 17958 |
+
"learning_rate": 1.999990415200848e-05,
|
| 17959 |
+
"loss": 0.811,
|
| 17960 |
+
"step": 2992
|
| 17961 |
+
},
|
| 17962 |
+
{
|
| 17963 |
+
"epoch": 0.78,
|
| 17964 |
+
"learning_rate": 1.999990266023578e-05,
|
| 17965 |
+
"loss": 1.0274,
|
| 17966 |
+
"step": 2993
|
| 17967 |
+
},
|
| 17968 |
+
{
|
| 17969 |
+
"epoch": 0.78,
|
| 17970 |
+
"learning_rate": 1.999990115694367e-05,
|
| 17971 |
+
"loss": 0.9053,
|
| 17972 |
+
"step": 2994
|
| 17973 |
+
},
|
| 17974 |
+
{
|
| 17975 |
+
"epoch": 0.78,
|
| 17976 |
+
"learning_rate": 1.9999899642132162e-05,
|
| 17977 |
+
"loss": 0.6738,
|
| 17978 |
+
"step": 2995
|
| 17979 |
+
},
|
| 17980 |
+
{
|
| 17981 |
+
"epoch": 0.78,
|
| 17982 |
+
"learning_rate": 1.9999898115801248e-05,
|
| 17983 |
+
"loss": 0.894,
|
| 17984 |
+
"step": 2996
|
| 17985 |
+
},
|
| 17986 |
+
{
|
| 17987 |
+
"epoch": 0.79,
|
| 17988 |
+
"learning_rate": 1.9999896577950936e-05,
|
| 17989 |
+
"loss": 0.8427,
|
| 17990 |
+
"step": 2997
|
| 17991 |
+
},
|
| 17992 |
+
{
|
| 17993 |
+
"epoch": 0.79,
|
| 17994 |
+
"learning_rate": 1.9999895028581227e-05,
|
| 17995 |
+
"loss": 0.848,
|
| 17996 |
+
"step": 2998
|
| 17997 |
+
},
|
| 17998 |
+
{
|
| 17999 |
+
"epoch": 0.79,
|
| 18000 |
+
"learning_rate": 1.999989346769212e-05,
|
| 18001 |
+
"loss": 0.7822,
|
| 18002 |
+
"step": 2999
|
| 18003 |
+
},
|
| 18004 |
+
{
|
| 18005 |
+
"epoch": 0.79,
|
| 18006 |
+
"learning_rate": 1.999989189528362e-05,
|
| 18007 |
+
"loss": 0.6597,
|
| 18008 |
+
"step": 3000
|
| 18009 |
}
|
| 18010 |
],
|
| 18011 |
"max_steps": 95425,
|
| 18012 |
"num_train_epochs": 25,
|
| 18013 |
+
"total_flos": 2.29528665980928e+16,
|
| 18014 |
"trial_name": null,
|
| 18015 |
"trial_params": null
|
| 18016 |
}
|