Upload folder using huggingface_hub
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- rng_state_2.pth +1 -1
- rng_state_3.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +2769 -3
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4965799096
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2e3f36e54ba8ad67dff82546b6c26ef4ce154c3e0cf26118eb240e0f7261bbb
|
| 3 |
size 4965799096
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1459729952
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ca83797f0bb9e75400adeb2e7b7de1803782e09f9c5a3b8de9a9b6bf0dcb15a
|
| 3 |
size 1459729952
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6527220350
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:223ab05e7281bb453e1ca7b5c9a9f35a4ef9ca55e63c6f84a5c0c045878a319b
|
| 3 |
size 6527220350
|
rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c5e18f922d0af74d820247ae97bee506ab412554a58345ddf2558abc94ee3e3
|
| 3 |
size 15024
|
rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a2dcca6d9741f46592359768ea2212b9321da6408d1fd7d3a80b017bf37f434
|
| 3 |
size 15024
|
rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69420ece2c255923c5cbb3c6c9c4a6b9cb38fb57e5d3033c8b7d436a1faf6f13
|
| 3 |
size 15024
|
rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66f278b40a1e23b88a657c4e5d03afa8dbbbe14dfeb16f6b4beedaece6cdd0b9
|
| 3 |
size 15024
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c21fd11c3fb49d4f1045ef1ecdb7c043636d2e0c5d7347765d8a80540a590af
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -11047,6 +11047,2772 @@
|
|
| 11047 |
"learning_rate": 3.673088019729587e-06,
|
| 11048 |
"loss": 1.1386,
|
| 11049 |
"step": 1576
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11050 |
}
|
| 11051 |
],
|
| 11052 |
"logging_steps": 1,
|
|
@@ -11066,7 +13832,7 @@
|
|
| 11066 |
"attributes": {}
|
| 11067 |
}
|
| 11068 |
},
|
| 11069 |
-
"total_flos":
|
| 11070 |
"train_batch_size": 2,
|
| 11071 |
"trial_name": null,
|
| 11072 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.497777777777778,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1970,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 11047 |
"learning_rate": 3.673088019729587e-06,
|
| 11048 |
"loss": 1.1386,
|
| 11049 |
"step": 1576
|
| 11050 |
+
},
|
| 11051 |
+
{
|
| 11052 |
+
"epoch": 2.0,
|
| 11053 |
+
"grad_norm": 1.4375,
|
| 11054 |
+
"learning_rate": 3.669484870592032e-06,
|
| 11055 |
+
"loss": 0.5283,
|
| 11056 |
+
"step": 1577
|
| 11057 |
+
},
|
| 11058 |
+
{
|
| 11059 |
+
"epoch": 2.0006349206349205,
|
| 11060 |
+
"grad_norm": 0.703125,
|
| 11061 |
+
"learning_rate": 3.665881541401329e-06,
|
| 11062 |
+
"loss": 0.5619,
|
| 11063 |
+
"step": 1578
|
| 11064 |
+
},
|
| 11065 |
+
{
|
| 11066 |
+
"epoch": 2.0006349206349205,
|
| 11067 |
+
"eval_loss": 1.1575638055801392,
|
| 11068 |
+
"eval_runtime": 89.0193,
|
| 11069 |
+
"eval_samples_per_second": 48.697,
|
| 11070 |
+
"eval_steps_per_second": 6.089,
|
| 11071 |
+
"step": 1578
|
| 11072 |
+
},
|
| 11073 |
+
{
|
| 11074 |
+
"epoch": 2.001269841269841,
|
| 11075 |
+
"grad_norm": 0.92578125,
|
| 11076 |
+
"learning_rate": 3.6622780359854946e-06,
|
| 11077 |
+
"loss": 1.1188,
|
| 11078 |
+
"step": 1579
|
| 11079 |
+
},
|
| 11080 |
+
{
|
| 11081 |
+
"epoch": 2.0025396825396826,
|
| 11082 |
+
"grad_norm": 1.0234375,
|
| 11083 |
+
"learning_rate": 3.6586743581727326e-06,
|
| 11084 |
+
"loss": 1.1121,
|
| 11085 |
+
"step": 1580
|
| 11086 |
+
},
|
| 11087 |
+
{
|
| 11088 |
+
"epoch": 2.0038095238095237,
|
| 11089 |
+
"grad_norm": 0.91015625,
|
| 11090 |
+
"learning_rate": 3.6550705117914276e-06,
|
| 11091 |
+
"loss": 1.0873,
|
| 11092 |
+
"step": 1581
|
| 11093 |
+
},
|
| 11094 |
+
{
|
| 11095 |
+
"epoch": 2.0050793650793652,
|
| 11096 |
+
"grad_norm": 0.9921875,
|
| 11097 |
+
"learning_rate": 3.6514665006701447e-06,
|
| 11098 |
+
"loss": 1.1009,
|
| 11099 |
+
"step": 1582
|
| 11100 |
+
},
|
| 11101 |
+
{
|
| 11102 |
+
"epoch": 2.0063492063492063,
|
| 11103 |
+
"grad_norm": 0.96875,
|
| 11104 |
+
"learning_rate": 3.6478623286376248e-06,
|
| 11105 |
+
"loss": 1.0493,
|
| 11106 |
+
"step": 1583
|
| 11107 |
+
},
|
| 11108 |
+
{
|
| 11109 |
+
"epoch": 2.0076190476190474,
|
| 11110 |
+
"grad_norm": 0.9140625,
|
| 11111 |
+
"learning_rate": 3.6442579995227805e-06,
|
| 11112 |
+
"loss": 1.1485,
|
| 11113 |
+
"step": 1584
|
| 11114 |
+
},
|
| 11115 |
+
{
|
| 11116 |
+
"epoch": 2.008888888888889,
|
| 11117 |
+
"grad_norm": 0.953125,
|
| 11118 |
+
"learning_rate": 3.6406535171546864e-06,
|
| 11119 |
+
"loss": 1.1112,
|
| 11120 |
+
"step": 1585
|
| 11121 |
+
},
|
| 11122 |
+
{
|
| 11123 |
+
"epoch": 2.01015873015873,
|
| 11124 |
+
"grad_norm": 0.8828125,
|
| 11125 |
+
"learning_rate": 3.637048885362587e-06,
|
| 11126 |
+
"loss": 1.1392,
|
| 11127 |
+
"step": 1586
|
| 11128 |
+
},
|
| 11129 |
+
{
|
| 11130 |
+
"epoch": 2.0114285714285716,
|
| 11131 |
+
"grad_norm": 1.0625,
|
| 11132 |
+
"learning_rate": 3.6334441079758788e-06,
|
| 11133 |
+
"loss": 1.1524,
|
| 11134 |
+
"step": 1587
|
| 11135 |
+
},
|
| 11136 |
+
{
|
| 11137 |
+
"epoch": 2.0126984126984127,
|
| 11138 |
+
"grad_norm": 0.98046875,
|
| 11139 |
+
"learning_rate": 3.629839188824118e-06,
|
| 11140 |
+
"loss": 1.0846,
|
| 11141 |
+
"step": 1588
|
| 11142 |
+
},
|
| 11143 |
+
{
|
| 11144 |
+
"epoch": 2.013968253968254,
|
| 11145 |
+
"grad_norm": 0.9375,
|
| 11146 |
+
"learning_rate": 3.62623413173701e-06,
|
| 11147 |
+
"loss": 1.0632,
|
| 11148 |
+
"step": 1589
|
| 11149 |
+
},
|
| 11150 |
+
{
|
| 11151 |
+
"epoch": 2.0152380952380953,
|
| 11152 |
+
"grad_norm": 0.99609375,
|
| 11153 |
+
"learning_rate": 3.6226289405444057e-06,
|
| 11154 |
+
"loss": 1.0533,
|
| 11155 |
+
"step": 1590
|
| 11156 |
+
},
|
| 11157 |
+
{
|
| 11158 |
+
"epoch": 2.0165079365079364,
|
| 11159 |
+
"grad_norm": 0.97265625,
|
| 11160 |
+
"learning_rate": 3.619023619076297e-06,
|
| 11161 |
+
"loss": 1.1303,
|
| 11162 |
+
"step": 1591
|
| 11163 |
+
},
|
| 11164 |
+
{
|
| 11165 |
+
"epoch": 2.017777777777778,
|
| 11166 |
+
"grad_norm": 1.0,
|
| 11167 |
+
"learning_rate": 3.6154181711628185e-06,
|
| 11168 |
+
"loss": 1.1772,
|
| 11169 |
+
"step": 1592
|
| 11170 |
+
},
|
| 11171 |
+
{
|
| 11172 |
+
"epoch": 2.019047619047619,
|
| 11173 |
+
"grad_norm": 1.0,
|
| 11174 |
+
"learning_rate": 3.611812600634237e-06,
|
| 11175 |
+
"loss": 1.0997,
|
| 11176 |
+
"step": 1593
|
| 11177 |
+
},
|
| 11178 |
+
{
|
| 11179 |
+
"epoch": 2.0203174603174605,
|
| 11180 |
+
"grad_norm": 0.9609375,
|
| 11181 |
+
"learning_rate": 3.6082069113209474e-06,
|
| 11182 |
+
"loss": 1.1378,
|
| 11183 |
+
"step": 1594
|
| 11184 |
+
},
|
| 11185 |
+
{
|
| 11186 |
+
"epoch": 2.0215873015873016,
|
| 11187 |
+
"grad_norm": 1.015625,
|
| 11188 |
+
"learning_rate": 3.6046011070534754e-06,
|
| 11189 |
+
"loss": 1.0801,
|
| 11190 |
+
"step": 1595
|
| 11191 |
+
},
|
| 11192 |
+
{
|
| 11193 |
+
"epoch": 2.0228571428571427,
|
| 11194 |
+
"grad_norm": 0.94140625,
|
| 11195 |
+
"learning_rate": 3.6009951916624636e-06,
|
| 11196 |
+
"loss": 1.0544,
|
| 11197 |
+
"step": 1596
|
| 11198 |
+
},
|
| 11199 |
+
{
|
| 11200 |
+
"epoch": 2.024126984126984,
|
| 11201 |
+
"grad_norm": 1.0546875,
|
| 11202 |
+
"learning_rate": 3.5973891689786764e-06,
|
| 11203 |
+
"loss": 1.1424,
|
| 11204 |
+
"step": 1597
|
| 11205 |
+
},
|
| 11206 |
+
{
|
| 11207 |
+
"epoch": 2.0253968253968253,
|
| 11208 |
+
"grad_norm": 1.03125,
|
| 11209 |
+
"learning_rate": 3.593783042832992e-06,
|
| 11210 |
+
"loss": 1.1791,
|
| 11211 |
+
"step": 1598
|
| 11212 |
+
},
|
| 11213 |
+
{
|
| 11214 |
+
"epoch": 2.026666666666667,
|
| 11215 |
+
"grad_norm": 0.9921875,
|
| 11216 |
+
"learning_rate": 3.5901768170563963e-06,
|
| 11217 |
+
"loss": 1.1071,
|
| 11218 |
+
"step": 1599
|
| 11219 |
+
},
|
| 11220 |
+
{
|
| 11221 |
+
"epoch": 2.027936507936508,
|
| 11222 |
+
"grad_norm": 0.95703125,
|
| 11223 |
+
"learning_rate": 3.5865704954799826e-06,
|
| 11224 |
+
"loss": 1.1094,
|
| 11225 |
+
"step": 1600
|
| 11226 |
+
},
|
| 11227 |
+
{
|
| 11228 |
+
"epoch": 2.029206349206349,
|
| 11229 |
+
"grad_norm": 0.92578125,
|
| 11230 |
+
"learning_rate": 3.582964081934946e-06,
|
| 11231 |
+
"loss": 1.0959,
|
| 11232 |
+
"step": 1601
|
| 11233 |
+
},
|
| 11234 |
+
{
|
| 11235 |
+
"epoch": 2.0304761904761905,
|
| 11236 |
+
"grad_norm": 0.921875,
|
| 11237 |
+
"learning_rate": 3.5793575802525783e-06,
|
| 11238 |
+
"loss": 1.1588,
|
| 11239 |
+
"step": 1602
|
| 11240 |
+
},
|
| 11241 |
+
{
|
| 11242 |
+
"epoch": 2.0317460317460316,
|
| 11243 |
+
"grad_norm": 0.94140625,
|
| 11244 |
+
"learning_rate": 3.5757509942642663e-06,
|
| 11245 |
+
"loss": 1.1089,
|
| 11246 |
+
"step": 1603
|
| 11247 |
+
},
|
| 11248 |
+
{
|
| 11249 |
+
"epoch": 2.033015873015873,
|
| 11250 |
+
"grad_norm": 0.96484375,
|
| 11251 |
+
"learning_rate": 3.5721443278014855e-06,
|
| 11252 |
+
"loss": 1.1385,
|
| 11253 |
+
"step": 1604
|
| 11254 |
+
},
|
| 11255 |
+
{
|
| 11256 |
+
"epoch": 2.0342857142857143,
|
| 11257 |
+
"grad_norm": 0.9453125,
|
| 11258 |
+
"learning_rate": 3.5685375846957967e-06,
|
| 11259 |
+
"loss": 1.1326,
|
| 11260 |
+
"step": 1605
|
| 11261 |
+
},
|
| 11262 |
+
{
|
| 11263 |
+
"epoch": 2.0355555555555553,
|
| 11264 |
+
"grad_norm": 0.98046875,
|
| 11265 |
+
"learning_rate": 3.564930768778842e-06,
|
| 11266 |
+
"loss": 1.156,
|
| 11267 |
+
"step": 1606
|
| 11268 |
+
},
|
| 11269 |
+
{
|
| 11270 |
+
"epoch": 2.036825396825397,
|
| 11271 |
+
"grad_norm": 0.8984375,
|
| 11272 |
+
"learning_rate": 3.561323883882344e-06,
|
| 11273 |
+
"loss": 1.0762,
|
| 11274 |
+
"step": 1607
|
| 11275 |
+
},
|
| 11276 |
+
{
|
| 11277 |
+
"epoch": 2.038095238095238,
|
| 11278 |
+
"grad_norm": 0.9765625,
|
| 11279 |
+
"learning_rate": 3.557716933838093e-06,
|
| 11280 |
+
"loss": 1.1483,
|
| 11281 |
+
"step": 1608
|
| 11282 |
+
},
|
| 11283 |
+
{
|
| 11284 |
+
"epoch": 2.0393650793650795,
|
| 11285 |
+
"grad_norm": 1.0546875,
|
| 11286 |
+
"learning_rate": 3.5541099224779517e-06,
|
| 11287 |
+
"loss": 1.1206,
|
| 11288 |
+
"step": 1609
|
| 11289 |
+
},
|
| 11290 |
+
{
|
| 11291 |
+
"epoch": 2.0406349206349206,
|
| 11292 |
+
"grad_norm": 0.94140625,
|
| 11293 |
+
"learning_rate": 3.55050285363385e-06,
|
| 11294 |
+
"loss": 1.0709,
|
| 11295 |
+
"step": 1610
|
| 11296 |
+
},
|
| 11297 |
+
{
|
| 11298 |
+
"epoch": 2.041904761904762,
|
| 11299 |
+
"grad_norm": 0.9921875,
|
| 11300 |
+
"learning_rate": 3.5468957311377735e-06,
|
| 11301 |
+
"loss": 1.0941,
|
| 11302 |
+
"step": 1611
|
| 11303 |
+
},
|
| 11304 |
+
{
|
| 11305 |
+
"epoch": 2.043174603174603,
|
| 11306 |
+
"grad_norm": 0.8984375,
|
| 11307 |
+
"learning_rate": 3.54328855882177e-06,
|
| 11308 |
+
"loss": 1.051,
|
| 11309 |
+
"step": 1612
|
| 11310 |
+
},
|
| 11311 |
+
{
|
| 11312 |
+
"epoch": 2.0444444444444443,
|
| 11313 |
+
"grad_norm": 0.875,
|
| 11314 |
+
"learning_rate": 3.5396813405179376e-06,
|
| 11315 |
+
"loss": 1.0019,
|
| 11316 |
+
"step": 1613
|
| 11317 |
+
},
|
| 11318 |
+
{
|
| 11319 |
+
"epoch": 2.045714285714286,
|
| 11320 |
+
"grad_norm": 0.98046875,
|
| 11321 |
+
"learning_rate": 3.536074080058423e-06,
|
| 11322 |
+
"loss": 1.1298,
|
| 11323 |
+
"step": 1614
|
| 11324 |
+
},
|
| 11325 |
+
{
|
| 11326 |
+
"epoch": 2.046984126984127,
|
| 11327 |
+
"grad_norm": 0.9375,
|
| 11328 |
+
"learning_rate": 3.5324667812754193e-06,
|
| 11329 |
+
"loss": 1.1687,
|
| 11330 |
+
"step": 1615
|
| 11331 |
+
},
|
| 11332 |
+
{
|
| 11333 |
+
"epoch": 2.0482539682539684,
|
| 11334 |
+
"grad_norm": 0.99609375,
|
| 11335 |
+
"learning_rate": 3.5288594480011592e-06,
|
| 11336 |
+
"loss": 1.0941,
|
| 11337 |
+
"step": 1616
|
| 11338 |
+
},
|
| 11339 |
+
{
|
| 11340 |
+
"epoch": 2.0495238095238095,
|
| 11341 |
+
"grad_norm": 0.953125,
|
| 11342 |
+
"learning_rate": 3.525252084067912e-06,
|
| 11343 |
+
"loss": 1.1288,
|
| 11344 |
+
"step": 1617
|
| 11345 |
+
},
|
| 11346 |
+
{
|
| 11347 |
+
"epoch": 2.0507936507936506,
|
| 11348 |
+
"grad_norm": 1.1015625,
|
| 11349 |
+
"learning_rate": 3.5216446933079787e-06,
|
| 11350 |
+
"loss": 1.0944,
|
| 11351 |
+
"step": 1618
|
| 11352 |
+
},
|
| 11353 |
+
{
|
| 11354 |
+
"epoch": 2.052063492063492,
|
| 11355 |
+
"grad_norm": 0.953125,
|
| 11356 |
+
"learning_rate": 3.518037279553693e-06,
|
| 11357 |
+
"loss": 1.1013,
|
| 11358 |
+
"step": 1619
|
| 11359 |
+
},
|
| 11360 |
+
{
|
| 11361 |
+
"epoch": 2.0533333333333332,
|
| 11362 |
+
"grad_norm": 0.9453125,
|
| 11363 |
+
"learning_rate": 3.5144298466374074e-06,
|
| 11364 |
+
"loss": 1.1078,
|
| 11365 |
+
"step": 1620
|
| 11366 |
+
},
|
| 11367 |
+
{
|
| 11368 |
+
"epoch": 2.0546031746031748,
|
| 11369 |
+
"grad_norm": 0.98046875,
|
| 11370 |
+
"learning_rate": 3.5108223983914974e-06,
|
| 11371 |
+
"loss": 1.1235,
|
| 11372 |
+
"step": 1621
|
| 11373 |
+
},
|
| 11374 |
+
{
|
| 11375 |
+
"epoch": 2.055873015873016,
|
| 11376 |
+
"grad_norm": 0.9765625,
|
| 11377 |
+
"learning_rate": 3.5072149386483557e-06,
|
| 11378 |
+
"loss": 1.1266,
|
| 11379 |
+
"step": 1622
|
| 11380 |
+
},
|
| 11381 |
+
{
|
| 11382 |
+
"epoch": 2.057142857142857,
|
| 11383 |
+
"grad_norm": 0.98828125,
|
| 11384 |
+
"learning_rate": 3.5036074712403857e-06,
|
| 11385 |
+
"loss": 1.1639,
|
| 11386 |
+
"step": 1623
|
| 11387 |
+
},
|
| 11388 |
+
{
|
| 11389 |
+
"epoch": 2.0584126984126985,
|
| 11390 |
+
"grad_norm": 0.9453125,
|
| 11391 |
+
"learning_rate": 3.5e-06,
|
| 11392 |
+
"loss": 1.1113,
|
| 11393 |
+
"step": 1624
|
| 11394 |
+
},
|
| 11395 |
+
{
|
| 11396 |
+
"epoch": 2.0596825396825396,
|
| 11397 |
+
"grad_norm": 1.03125,
|
| 11398 |
+
"learning_rate": 3.4963925287596137e-06,
|
| 11399 |
+
"loss": 1.1378,
|
| 11400 |
+
"step": 1625
|
| 11401 |
+
},
|
| 11402 |
+
{
|
| 11403 |
+
"epoch": 2.060952380952381,
|
| 11404 |
+
"grad_norm": 1.015625,
|
| 11405 |
+
"learning_rate": 3.492785061351644e-06,
|
| 11406 |
+
"loss": 1.0979,
|
| 11407 |
+
"step": 1626
|
| 11408 |
+
},
|
| 11409 |
+
{
|
| 11410 |
+
"epoch": 2.062222222222222,
|
| 11411 |
+
"grad_norm": 1.0078125,
|
| 11412 |
+
"learning_rate": 3.4891776016085025e-06,
|
| 11413 |
+
"loss": 1.066,
|
| 11414 |
+
"step": 1627
|
| 11415 |
+
},
|
| 11416 |
+
{
|
| 11417 |
+
"epoch": 2.0634920634920633,
|
| 11418 |
+
"grad_norm": 1.015625,
|
| 11419 |
+
"learning_rate": 3.4855701533625925e-06,
|
| 11420 |
+
"loss": 1.1784,
|
| 11421 |
+
"step": 1628
|
| 11422 |
+
},
|
| 11423 |
+
{
|
| 11424 |
+
"epoch": 2.064761904761905,
|
| 11425 |
+
"grad_norm": 0.98828125,
|
| 11426 |
+
"learning_rate": 3.4819627204463077e-06,
|
| 11427 |
+
"loss": 1.0794,
|
| 11428 |
+
"step": 1629
|
| 11429 |
+
},
|
| 11430 |
+
{
|
| 11431 |
+
"epoch": 2.066031746031746,
|
| 11432 |
+
"grad_norm": 0.953125,
|
| 11433 |
+
"learning_rate": 3.478355306692021e-06,
|
| 11434 |
+
"loss": 1.0571,
|
| 11435 |
+
"step": 1630
|
| 11436 |
+
},
|
| 11437 |
+
{
|
| 11438 |
+
"epoch": 2.0673015873015874,
|
| 11439 |
+
"grad_norm": 0.9453125,
|
| 11440 |
+
"learning_rate": 3.4747479159320884e-06,
|
| 11441 |
+
"loss": 1.0963,
|
| 11442 |
+
"step": 1631
|
| 11443 |
+
},
|
| 11444 |
+
{
|
| 11445 |
+
"epoch": 2.0685714285714285,
|
| 11446 |
+
"grad_norm": 0.96875,
|
| 11447 |
+
"learning_rate": 3.4711405519988415e-06,
|
| 11448 |
+
"loss": 1.1355,
|
| 11449 |
+
"step": 1632
|
| 11450 |
+
},
|
| 11451 |
+
{
|
| 11452 |
+
"epoch": 2.06984126984127,
|
| 11453 |
+
"grad_norm": 0.93359375,
|
| 11454 |
+
"learning_rate": 3.467533218724581e-06,
|
| 11455 |
+
"loss": 1.193,
|
| 11456 |
+
"step": 1633
|
| 11457 |
+
},
|
| 11458 |
+
{
|
| 11459 |
+
"epoch": 2.071111111111111,
|
| 11460 |
+
"grad_norm": 0.9453125,
|
| 11461 |
+
"learning_rate": 3.4639259199415767e-06,
|
| 11462 |
+
"loss": 1.1166,
|
| 11463 |
+
"step": 1634
|
| 11464 |
+
},
|
| 11465 |
+
{
|
| 11466 |
+
"epoch": 2.072380952380952,
|
| 11467 |
+
"grad_norm": 0.96875,
|
| 11468 |
+
"learning_rate": 3.4603186594820627e-06,
|
| 11469 |
+
"loss": 1.0351,
|
| 11470 |
+
"step": 1635
|
| 11471 |
+
},
|
| 11472 |
+
{
|
| 11473 |
+
"epoch": 2.0736507936507937,
|
| 11474 |
+
"grad_norm": 0.98046875,
|
| 11475 |
+
"learning_rate": 3.45671144117823e-06,
|
| 11476 |
+
"loss": 1.1471,
|
| 11477 |
+
"step": 1636
|
| 11478 |
+
},
|
| 11479 |
+
{
|
| 11480 |
+
"epoch": 2.074920634920635,
|
| 11481 |
+
"grad_norm": 0.9140625,
|
| 11482 |
+
"learning_rate": 3.453104268862226e-06,
|
| 11483 |
+
"loss": 1.0593,
|
| 11484 |
+
"step": 1637
|
| 11485 |
+
},
|
| 11486 |
+
{
|
| 11487 |
+
"epoch": 2.0761904761904764,
|
| 11488 |
+
"grad_norm": 0.96484375,
|
| 11489 |
+
"learning_rate": 3.4494971463661514e-06,
|
| 11490 |
+
"loss": 1.1042,
|
| 11491 |
+
"step": 1638
|
| 11492 |
+
},
|
| 11493 |
+
{
|
| 11494 |
+
"epoch": 2.0774603174603175,
|
| 11495 |
+
"grad_norm": 0.94140625,
|
| 11496 |
+
"learning_rate": 3.4458900775220487e-06,
|
| 11497 |
+
"loss": 1.0667,
|
| 11498 |
+
"step": 1639
|
| 11499 |
+
},
|
| 11500 |
+
{
|
| 11501 |
+
"epoch": 2.0787301587301585,
|
| 11502 |
+
"grad_norm": 0.921875,
|
| 11503 |
+
"learning_rate": 3.4422830661619072e-06,
|
| 11504 |
+
"loss": 1.0711,
|
| 11505 |
+
"step": 1640
|
| 11506 |
+
},
|
| 11507 |
+
{
|
| 11508 |
+
"epoch": 2.08,
|
| 11509 |
+
"grad_norm": 0.96484375,
|
| 11510 |
+
"learning_rate": 3.438676116117657e-06,
|
| 11511 |
+
"loss": 1.1181,
|
| 11512 |
+
"step": 1641
|
| 11513 |
+
},
|
| 11514 |
+
{
|
| 11515 |
+
"epoch": 2.081269841269841,
|
| 11516 |
+
"grad_norm": 0.9296875,
|
| 11517 |
+
"learning_rate": 3.435069231221158e-06,
|
| 11518 |
+
"loss": 1.0383,
|
| 11519 |
+
"step": 1642
|
| 11520 |
+
},
|
| 11521 |
+
{
|
| 11522 |
+
"epoch": 2.0825396825396827,
|
| 11523 |
+
"grad_norm": 0.9453125,
|
| 11524 |
+
"learning_rate": 3.431462415304204e-06,
|
| 11525 |
+
"loss": 1.0495,
|
| 11526 |
+
"step": 1643
|
| 11527 |
+
},
|
| 11528 |
+
{
|
| 11529 |
+
"epoch": 2.083809523809524,
|
| 11530 |
+
"grad_norm": 1.0,
|
| 11531 |
+
"learning_rate": 3.4278556721985157e-06,
|
| 11532 |
+
"loss": 1.0865,
|
| 11533 |
+
"step": 1644
|
| 11534 |
+
},
|
| 11535 |
+
{
|
| 11536 |
+
"epoch": 2.085079365079365,
|
| 11537 |
+
"grad_norm": 0.9453125,
|
| 11538 |
+
"learning_rate": 3.424249005735734e-06,
|
| 11539 |
+
"loss": 1.0705,
|
| 11540 |
+
"step": 1645
|
| 11541 |
+
},
|
| 11542 |
+
{
|
| 11543 |
+
"epoch": 2.0863492063492064,
|
| 11544 |
+
"grad_norm": 0.9921875,
|
| 11545 |
+
"learning_rate": 3.420642419747422e-06,
|
| 11546 |
+
"loss": 1.0912,
|
| 11547 |
+
"step": 1646
|
| 11548 |
+
},
|
| 11549 |
+
{
|
| 11550 |
+
"epoch": 2.0876190476190475,
|
| 11551 |
+
"grad_norm": 0.98828125,
|
| 11552 |
+
"learning_rate": 3.4170359180650556e-06,
|
| 11553 |
+
"loss": 1.0855,
|
| 11554 |
+
"step": 1647
|
| 11555 |
+
},
|
| 11556 |
+
{
|
| 11557 |
+
"epoch": 2.088888888888889,
|
| 11558 |
+
"grad_norm": 1.0078125,
|
| 11559 |
+
"learning_rate": 3.4134295045200173e-06,
|
| 11560 |
+
"loss": 1.1909,
|
| 11561 |
+
"step": 1648
|
| 11562 |
+
},
|
| 11563 |
+
{
|
| 11564 |
+
"epoch": 2.09015873015873,
|
| 11565 |
+
"grad_norm": 0.98828125,
|
| 11566 |
+
"learning_rate": 3.4098231829436036e-06,
|
| 11567 |
+
"loss": 1.1589,
|
| 11568 |
+
"step": 1649
|
| 11569 |
+
},
|
| 11570 |
+
{
|
| 11571 |
+
"epoch": 2.0914285714285716,
|
| 11572 |
+
"grad_norm": 0.94921875,
|
| 11573 |
+
"learning_rate": 3.4062169571670087e-06,
|
| 11574 |
+
"loss": 1.1057,
|
| 11575 |
+
"step": 1650
|
| 11576 |
+
},
|
| 11577 |
+
{
|
| 11578 |
+
"epoch": 2.0926984126984127,
|
| 11579 |
+
"grad_norm": 1.0859375,
|
| 11580 |
+
"learning_rate": 3.402610831021324e-06,
|
| 11581 |
+
"loss": 1.1359,
|
| 11582 |
+
"step": 1651
|
| 11583 |
+
},
|
| 11584 |
+
{
|
| 11585 |
+
"epoch": 2.093968253968254,
|
| 11586 |
+
"grad_norm": 1.046875,
|
| 11587 |
+
"learning_rate": 3.3990048083375368e-06,
|
| 11588 |
+
"loss": 1.145,
|
| 11589 |
+
"step": 1652
|
| 11590 |
+
},
|
| 11591 |
+
{
|
| 11592 |
+
"epoch": 2.0952380952380953,
|
| 11593 |
+
"grad_norm": 1.03125,
|
| 11594 |
+
"learning_rate": 3.395398892946526e-06,
|
| 11595 |
+
"loss": 1.1437,
|
| 11596 |
+
"step": 1653
|
| 11597 |
+
},
|
| 11598 |
+
{
|
| 11599 |
+
"epoch": 2.0965079365079364,
|
| 11600 |
+
"grad_norm": 1.0,
|
| 11601 |
+
"learning_rate": 3.391793088679053e-06,
|
| 11602 |
+
"loss": 1.1066,
|
| 11603 |
+
"step": 1654
|
| 11604 |
+
},
|
| 11605 |
+
{
|
| 11606 |
+
"epoch": 2.097777777777778,
|
| 11607 |
+
"grad_norm": 0.94140625,
|
| 11608 |
+
"learning_rate": 3.3881873993657634e-06,
|
| 11609 |
+
"loss": 1.1463,
|
| 11610 |
+
"step": 1655
|
| 11611 |
+
},
|
| 11612 |
+
{
|
| 11613 |
+
"epoch": 2.099047619047619,
|
| 11614 |
+
"grad_norm": 0.90625,
|
| 11615 |
+
"learning_rate": 3.384581828837181e-06,
|
| 11616 |
+
"loss": 1.0738,
|
| 11617 |
+
"step": 1656
|
| 11618 |
+
},
|
| 11619 |
+
{
|
| 11620 |
+
"epoch": 2.10031746031746,
|
| 11621 |
+
"grad_norm": 1.015625,
|
| 11622 |
+
"learning_rate": 3.380976380923703e-06,
|
| 11623 |
+
"loss": 1.1684,
|
| 11624 |
+
"step": 1657
|
| 11625 |
+
},
|
| 11626 |
+
{
|
| 11627 |
+
"epoch": 2.1015873015873017,
|
| 11628 |
+
"grad_norm": 1.0703125,
|
| 11629 |
+
"learning_rate": 3.3773710594555947e-06,
|
| 11630 |
+
"loss": 1.1076,
|
| 11631 |
+
"step": 1658
|
| 11632 |
+
},
|
| 11633 |
+
{
|
| 11634 |
+
"epoch": 2.1028571428571428,
|
| 11635 |
+
"grad_norm": 0.96875,
|
| 11636 |
+
"learning_rate": 3.373765868262989e-06,
|
| 11637 |
+
"loss": 1.1151,
|
| 11638 |
+
"step": 1659
|
| 11639 |
+
},
|
| 11640 |
+
{
|
| 11641 |
+
"epoch": 2.1041269841269843,
|
| 11642 |
+
"grad_norm": 1.0390625,
|
| 11643 |
+
"learning_rate": 3.370160811175882e-06,
|
| 11644 |
+
"loss": 1.0252,
|
| 11645 |
+
"step": 1660
|
| 11646 |
+
},
|
| 11647 |
+
{
|
| 11648 |
+
"epoch": 2.1053968253968254,
|
| 11649 |
+
"grad_norm": 1.078125,
|
| 11650 |
+
"learning_rate": 3.3665558920241216e-06,
|
| 11651 |
+
"loss": 1.0858,
|
| 11652 |
+
"step": 1661
|
| 11653 |
+
},
|
| 11654 |
+
{
|
| 11655 |
+
"epoch": 2.1066666666666665,
|
| 11656 |
+
"grad_norm": 1.0625,
|
| 11657 |
+
"learning_rate": 3.362951114637413e-06,
|
| 11658 |
+
"loss": 1.0822,
|
| 11659 |
+
"step": 1662
|
| 11660 |
+
},
|
| 11661 |
+
{
|
| 11662 |
+
"epoch": 2.107936507936508,
|
| 11663 |
+
"grad_norm": 0.99609375,
|
| 11664 |
+
"learning_rate": 3.359346482845314e-06,
|
| 11665 |
+
"loss": 1.0881,
|
| 11666 |
+
"step": 1663
|
| 11667 |
+
},
|
| 11668 |
+
{
|
| 11669 |
+
"epoch": 2.109206349206349,
|
| 11670 |
+
"grad_norm": 0.94140625,
|
| 11671 |
+
"learning_rate": 3.3557420004772203e-06,
|
| 11672 |
+
"loss": 1.0764,
|
| 11673 |
+
"step": 1664
|
| 11674 |
+
},
|
| 11675 |
+
{
|
| 11676 |
+
"epoch": 2.1104761904761906,
|
| 11677 |
+
"grad_norm": 1.0078125,
|
| 11678 |
+
"learning_rate": 3.3521376713623743e-06,
|
| 11679 |
+
"loss": 1.0295,
|
| 11680 |
+
"step": 1665
|
| 11681 |
+
},
|
| 11682 |
+
{
|
| 11683 |
+
"epoch": 2.1117460317460317,
|
| 11684 |
+
"grad_norm": 1.0078125,
|
| 11685 |
+
"learning_rate": 3.3485334993298552e-06,
|
| 11686 |
+
"loss": 1.0734,
|
| 11687 |
+
"step": 1666
|
| 11688 |
+
},
|
| 11689 |
+
{
|
| 11690 |
+
"epoch": 2.113015873015873,
|
| 11691 |
+
"grad_norm": 0.96484375,
|
| 11692 |
+
"learning_rate": 3.3449294882085723e-06,
|
| 11693 |
+
"loss": 1.0289,
|
| 11694 |
+
"step": 1667
|
| 11695 |
+
},
|
| 11696 |
+
{
|
| 11697 |
+
"epoch": 2.1142857142857143,
|
| 11698 |
+
"grad_norm": 0.96875,
|
| 11699 |
+
"learning_rate": 3.341325641827267e-06,
|
| 11700 |
+
"loss": 1.1468,
|
| 11701 |
+
"step": 1668
|
| 11702 |
+
},
|
| 11703 |
+
{
|
| 11704 |
+
"epoch": 2.1155555555555554,
|
| 11705 |
+
"grad_norm": 1.0,
|
| 11706 |
+
"learning_rate": 3.3377219640145053e-06,
|
| 11707 |
+
"loss": 1.1006,
|
| 11708 |
+
"step": 1669
|
| 11709 |
+
},
|
| 11710 |
+
{
|
| 11711 |
+
"epoch": 2.116825396825397,
|
| 11712 |
+
"grad_norm": 0.93359375,
|
| 11713 |
+
"learning_rate": 3.334118458598671e-06,
|
| 11714 |
+
"loss": 1.1155,
|
| 11715 |
+
"step": 1670
|
| 11716 |
+
},
|
| 11717 |
+
{
|
| 11718 |
+
"epoch": 2.118095238095238,
|
| 11719 |
+
"grad_norm": 1.0546875,
|
| 11720 |
+
"learning_rate": 3.3305151294079673e-06,
|
| 11721 |
+
"loss": 1.1421,
|
| 11722 |
+
"step": 1671
|
| 11723 |
+
},
|
| 11724 |
+
{
|
| 11725 |
+
"epoch": 2.1193650793650796,
|
| 11726 |
+
"grad_norm": 0.98046875,
|
| 11727 |
+
"learning_rate": 3.326911980270414e-06,
|
| 11728 |
+
"loss": 1.1595,
|
| 11729 |
+
"step": 1672
|
| 11730 |
+
},
|
| 11731 |
+
{
|
| 11732 |
+
"epoch": 2.1206349206349207,
|
| 11733 |
+
"grad_norm": 1.0234375,
|
| 11734 |
+
"learning_rate": 3.323309015013831e-06,
|
| 11735 |
+
"loss": 1.1544,
|
| 11736 |
+
"step": 1673
|
| 11737 |
+
},
|
| 11738 |
+
{
|
| 11739 |
+
"epoch": 2.1219047619047617,
|
| 11740 |
+
"grad_norm": 0.953125,
|
| 11741 |
+
"learning_rate": 3.319706237465849e-06,
|
| 11742 |
+
"loss": 1.0962,
|
| 11743 |
+
"step": 1674
|
| 11744 |
+
},
|
| 11745 |
+
{
|
| 11746 |
+
"epoch": 2.1231746031746033,
|
| 11747 |
+
"grad_norm": 0.9921875,
|
| 11748 |
+
"learning_rate": 3.3161036514538992e-06,
|
| 11749 |
+
"loss": 1.1188,
|
| 11750 |
+
"step": 1675
|
| 11751 |
+
},
|
| 11752 |
+
{
|
| 11753 |
+
"epoch": 2.1244444444444444,
|
| 11754 |
+
"grad_norm": 0.98046875,
|
| 11755 |
+
"learning_rate": 3.3125012608052056e-06,
|
| 11756 |
+
"loss": 1.1019,
|
| 11757 |
+
"step": 1676
|
| 11758 |
+
},
|
| 11759 |
+
{
|
| 11760 |
+
"epoch": 2.125714285714286,
|
| 11761 |
+
"grad_norm": 0.9375,
|
| 11762 |
+
"learning_rate": 3.308899069346788e-06,
|
| 11763 |
+
"loss": 1.0536,
|
| 11764 |
+
"step": 1677
|
| 11765 |
+
},
|
| 11766 |
+
{
|
| 11767 |
+
"epoch": 2.126984126984127,
|
| 11768 |
+
"grad_norm": 1.0078125,
|
| 11769 |
+
"learning_rate": 3.305297080905455e-06,
|
| 11770 |
+
"loss": 1.1097,
|
| 11771 |
+
"step": 1678
|
| 11772 |
+
},
|
| 11773 |
+
{
|
| 11774 |
+
"epoch": 2.128253968253968,
|
| 11775 |
+
"grad_norm": 1.0078125,
|
| 11776 |
+
"learning_rate": 3.301695299307794e-06,
|
| 11777 |
+
"loss": 1.1417,
|
| 11778 |
+
"step": 1679
|
| 11779 |
+
},
|
| 11780 |
+
{
|
| 11781 |
+
"epoch": 2.1295238095238096,
|
| 11782 |
+
"grad_norm": 0.99609375,
|
| 11783 |
+
"learning_rate": 3.2980937283801805e-06,
|
| 11784 |
+
"loss": 1.0972,
|
| 11785 |
+
"step": 1680
|
| 11786 |
+
},
|
| 11787 |
+
{
|
| 11788 |
+
"epoch": 2.1307936507936507,
|
| 11789 |
+
"grad_norm": 0.98046875,
|
| 11790 |
+
"learning_rate": 3.2944923719487624e-06,
|
| 11791 |
+
"loss": 1.0071,
|
| 11792 |
+
"step": 1681
|
| 11793 |
+
},
|
| 11794 |
+
{
|
| 11795 |
+
"epoch": 2.132063492063492,
|
| 11796 |
+
"grad_norm": 0.9609375,
|
| 11797 |
+
"learning_rate": 3.290891233839457e-06,
|
| 11798 |
+
"loss": 1.0581,
|
| 11799 |
+
"step": 1682
|
| 11800 |
+
},
|
| 11801 |
+
{
|
| 11802 |
+
"epoch": 2.1333333333333333,
|
| 11803 |
+
"grad_norm": 0.921875,
|
| 11804 |
+
"learning_rate": 3.287290317877954e-06,
|
| 11805 |
+
"loss": 1.0501,
|
| 11806 |
+
"step": 1683
|
| 11807 |
+
},
|
| 11808 |
+
{
|
| 11809 |
+
"epoch": 2.134603174603175,
|
| 11810 |
+
"grad_norm": 0.99609375,
|
| 11811 |
+
"learning_rate": 3.2836896278897062e-06,
|
| 11812 |
+
"loss": 1.0731,
|
| 11813 |
+
"step": 1684
|
| 11814 |
+
},
|
| 11815 |
+
{
|
| 11816 |
+
"epoch": 2.135873015873016,
|
| 11817 |
+
"grad_norm": 1.0,
|
| 11818 |
+
"learning_rate": 3.280089167699924e-06,
|
| 11819 |
+
"loss": 1.127,
|
| 11820 |
+
"step": 1685
|
| 11821 |
+
},
|
| 11822 |
+
{
|
| 11823 |
+
"epoch": 2.137142857142857,
|
| 11824 |
+
"grad_norm": 0.94140625,
|
| 11825 |
+
"learning_rate": 3.2764889411335757e-06,
|
| 11826 |
+
"loss": 1.0615,
|
| 11827 |
+
"step": 1686
|
| 11828 |
+
},
|
| 11829 |
+
{
|
| 11830 |
+
"epoch": 2.1384126984126985,
|
| 11831 |
+
"grad_norm": 1.015625,
|
| 11832 |
+
"learning_rate": 3.2728889520153823e-06,
|
| 11833 |
+
"loss": 1.1027,
|
| 11834 |
+
"step": 1687
|
| 11835 |
+
},
|
| 11836 |
+
{
|
| 11837 |
+
"epoch": 2.1396825396825396,
|
| 11838 |
+
"grad_norm": 1.0390625,
|
| 11839 |
+
"learning_rate": 3.26928920416981e-06,
|
| 11840 |
+
"loss": 1.1408,
|
| 11841 |
+
"step": 1688
|
| 11842 |
+
},
|
| 11843 |
+
{
|
| 11844 |
+
"epoch": 2.140952380952381,
|
| 11845 |
+
"grad_norm": 0.98046875,
|
| 11846 |
+
"learning_rate": 3.26568970142107e-06,
|
| 11847 |
+
"loss": 1.1034,
|
| 11848 |
+
"step": 1689
|
| 11849 |
+
},
|
| 11850 |
+
{
|
| 11851 |
+
"epoch": 2.1422222222222222,
|
| 11852 |
+
"grad_norm": 1.0859375,
|
| 11853 |
+
"learning_rate": 3.262090447593115e-06,
|
| 11854 |
+
"loss": 1.0953,
|
| 11855 |
+
"step": 1690
|
| 11856 |
+
},
|
| 11857 |
+
{
|
| 11858 |
+
"epoch": 2.1434920634920633,
|
| 11859 |
+
"grad_norm": 0.96484375,
|
| 11860 |
+
"learning_rate": 3.2584914465096283e-06,
|
| 11861 |
+
"loss": 1.0978,
|
| 11862 |
+
"step": 1691
|
| 11863 |
+
},
|
| 11864 |
+
{
|
| 11865 |
+
"epoch": 2.144761904761905,
|
| 11866 |
+
"grad_norm": 1.0390625,
|
| 11867 |
+
"learning_rate": 3.254892701994029e-06,
|
| 11868 |
+
"loss": 1.1599,
|
| 11869 |
+
"step": 1692
|
| 11870 |
+
},
|
| 11871 |
+
{
|
| 11872 |
+
"epoch": 2.146031746031746,
|
| 11873 |
+
"grad_norm": 1.09375,
|
| 11874 |
+
"learning_rate": 3.2512942178694626e-06,
|
| 11875 |
+
"loss": 1.1698,
|
| 11876 |
+
"step": 1693
|
| 11877 |
+
},
|
| 11878 |
+
{
|
| 11879 |
+
"epoch": 2.1473015873015875,
|
| 11880 |
+
"grad_norm": 0.94921875,
|
| 11881 |
+
"learning_rate": 3.2476959979587977e-06,
|
| 11882 |
+
"loss": 1.0336,
|
| 11883 |
+
"step": 1694
|
| 11884 |
+
},
|
| 11885 |
+
{
|
| 11886 |
+
"epoch": 2.1485714285714286,
|
| 11887 |
+
"grad_norm": 0.921875,
|
| 11888 |
+
"learning_rate": 3.2440980460846214e-06,
|
| 11889 |
+
"loss": 1.0691,
|
| 11890 |
+
"step": 1695
|
| 11891 |
+
},
|
| 11892 |
+
{
|
| 11893 |
+
"epoch": 2.1498412698412697,
|
| 11894 |
+
"grad_norm": 1.0,
|
| 11895 |
+
"learning_rate": 3.240500366069239e-06,
|
| 11896 |
+
"loss": 1.1295,
|
| 11897 |
+
"step": 1696
|
| 11898 |
+
},
|
| 11899 |
+
{
|
| 11900 |
+
"epoch": 2.151111111111111,
|
| 11901 |
+
"grad_norm": 1.0234375,
|
| 11902 |
+
"learning_rate": 3.2369029617346617e-06,
|
| 11903 |
+
"loss": 1.1279,
|
| 11904 |
+
"step": 1697
|
| 11905 |
+
},
|
| 11906 |
+
{
|
| 11907 |
+
"epoch": 2.1523809523809523,
|
| 11908 |
+
"grad_norm": 0.9765625,
|
| 11909 |
+
"learning_rate": 3.2333058369026136e-06,
|
| 11910 |
+
"loss": 1.1438,
|
| 11911 |
+
"step": 1698
|
| 11912 |
+
},
|
| 11913 |
+
{
|
| 11914 |
+
"epoch": 2.153650793650794,
|
| 11915 |
+
"grad_norm": 0.9140625,
|
| 11916 |
+
"learning_rate": 3.2297089953945195e-06,
|
| 11917 |
+
"loss": 1.0495,
|
| 11918 |
+
"step": 1699
|
| 11919 |
+
},
|
| 11920 |
+
{
|
| 11921 |
+
"epoch": 2.154920634920635,
|
| 11922 |
+
"grad_norm": 0.98828125,
|
| 11923 |
+
"learning_rate": 3.2261124410315003e-06,
|
| 11924 |
+
"loss": 1.1117,
|
| 11925 |
+
"step": 1700
|
| 11926 |
+
},
|
| 11927 |
+
{
|
| 11928 |
+
"epoch": 2.156190476190476,
|
| 11929 |
+
"grad_norm": 0.9609375,
|
| 11930 |
+
"learning_rate": 3.222516177634376e-06,
|
| 11931 |
+
"loss": 1.0946,
|
| 11932 |
+
"step": 1701
|
| 11933 |
+
},
|
| 11934 |
+
{
|
| 11935 |
+
"epoch": 2.1574603174603175,
|
| 11936 |
+
"grad_norm": 0.96484375,
|
| 11937 |
+
"learning_rate": 3.2189202090236574e-06,
|
| 11938 |
+
"loss": 1.0813,
|
| 11939 |
+
"step": 1702
|
| 11940 |
+
},
|
| 11941 |
+
{
|
| 11942 |
+
"epoch": 2.1587301587301586,
|
| 11943 |
+
"grad_norm": 1.0234375,
|
| 11944 |
+
"learning_rate": 3.2153245390195386e-06,
|
| 11945 |
+
"loss": 1.1138,
|
| 11946 |
+
"step": 1703
|
| 11947 |
+
},
|
| 11948 |
+
{
|
| 11949 |
+
"epoch": 2.16,
|
| 11950 |
+
"grad_norm": 0.9140625,
|
| 11951 |
+
"learning_rate": 3.211729171441899e-06,
|
| 11952 |
+
"loss": 1.0996,
|
| 11953 |
+
"step": 1704
|
| 11954 |
+
},
|
| 11955 |
+
{
|
| 11956 |
+
"epoch": 2.1612698412698412,
|
| 11957 |
+
"grad_norm": 1.0234375,
|
| 11958 |
+
"learning_rate": 3.2081341101102978e-06,
|
| 11959 |
+
"loss": 1.1335,
|
| 11960 |
+
"step": 1705
|
| 11961 |
+
},
|
| 11962 |
+
{
|
| 11963 |
+
"epoch": 2.1625396825396823,
|
| 11964 |
+
"grad_norm": 0.9453125,
|
| 11965 |
+
"learning_rate": 3.2045393588439657e-06,
|
| 11966 |
+
"loss": 1.1013,
|
| 11967 |
+
"step": 1706
|
| 11968 |
+
},
|
| 11969 |
+
{
|
| 11970 |
+
"epoch": 2.163809523809524,
|
| 11971 |
+
"grad_norm": 0.90625,
|
| 11972 |
+
"learning_rate": 3.2009449214618065e-06,
|
| 11973 |
+
"loss": 1.0708,
|
| 11974 |
+
"step": 1707
|
| 11975 |
+
},
|
| 11976 |
+
{
|
| 11977 |
+
"epoch": 2.165079365079365,
|
| 11978 |
+
"grad_norm": 0.90625,
|
| 11979 |
+
"learning_rate": 3.197350801782391e-06,
|
| 11980 |
+
"loss": 1.0554,
|
| 11981 |
+
"step": 1708
|
| 11982 |
+
},
|
| 11983 |
+
{
|
| 11984 |
+
"epoch": 2.1663492063492065,
|
| 11985 |
+
"grad_norm": 0.9765625,
|
| 11986 |
+
"learning_rate": 3.1937570036239484e-06,
|
| 11987 |
+
"loss": 1.1047,
|
| 11988 |
+
"step": 1709
|
| 11989 |
+
},
|
| 11990 |
+
{
|
| 11991 |
+
"epoch": 2.1676190476190476,
|
| 11992 |
+
"grad_norm": 0.96875,
|
| 11993 |
+
"learning_rate": 3.1901635308043696e-06,
|
| 11994 |
+
"loss": 1.1375,
|
| 11995 |
+
"step": 1710
|
| 11996 |
+
},
|
| 11997 |
+
{
|
| 11998 |
+
"epoch": 2.168888888888889,
|
| 11999 |
+
"grad_norm": 0.9765625,
|
| 12000 |
+
"learning_rate": 3.1865703871412033e-06,
|
| 12001 |
+
"loss": 1.1244,
|
| 12002 |
+
"step": 1711
|
| 12003 |
+
},
|
| 12004 |
+
{
|
| 12005 |
+
"epoch": 2.17015873015873,
|
| 12006 |
+
"grad_norm": 0.953125,
|
| 12007 |
+
"learning_rate": 3.18297757645164e-06,
|
| 12008 |
+
"loss": 1.0371,
|
| 12009 |
+
"step": 1712
|
| 12010 |
+
},
|
| 12011 |
+
{
|
| 12012 |
+
"epoch": 2.1714285714285713,
|
| 12013 |
+
"grad_norm": 0.8828125,
|
| 12014 |
+
"learning_rate": 3.179385102552523e-06,
|
| 12015 |
+
"loss": 1.1099,
|
| 12016 |
+
"step": 1713
|
| 12017 |
+
},
|
| 12018 |
+
{
|
| 12019 |
+
"epoch": 2.172698412698413,
|
| 12020 |
+
"grad_norm": 1.015625,
|
| 12021 |
+
"learning_rate": 3.175792969260336e-06,
|
| 12022 |
+
"loss": 1.1233,
|
| 12023 |
+
"step": 1714
|
| 12024 |
+
},
|
| 12025 |
+
{
|
| 12026 |
+
"epoch": 2.173968253968254,
|
| 12027 |
+
"grad_norm": 0.98046875,
|
| 12028 |
+
"learning_rate": 3.1722011803912004e-06,
|
| 12029 |
+
"loss": 1.1149,
|
| 12030 |
+
"step": 1715
|
| 12031 |
+
},
|
| 12032 |
+
{
|
| 12033 |
+
"epoch": 2.1752380952380954,
|
| 12034 |
+
"grad_norm": 1.0390625,
|
| 12035 |
+
"learning_rate": 3.1686097397608727e-06,
|
| 12036 |
+
"loss": 1.1355,
|
| 12037 |
+
"step": 1716
|
| 12038 |
+
},
|
| 12039 |
+
{
|
| 12040 |
+
"epoch": 2.1765079365079365,
|
| 12041 |
+
"grad_norm": 0.96875,
|
| 12042 |
+
"learning_rate": 3.1650186511847393e-06,
|
| 12043 |
+
"loss": 1.0709,
|
| 12044 |
+
"step": 1717
|
| 12045 |
+
},
|
| 12046 |
+
{
|
| 12047 |
+
"epoch": 2.1777777777777776,
|
| 12048 |
+
"grad_norm": 0.953125,
|
| 12049 |
+
"learning_rate": 3.1614279184778094e-06,
|
| 12050 |
+
"loss": 1.1508,
|
| 12051 |
+
"step": 1718
|
| 12052 |
+
},
|
| 12053 |
+
{
|
| 12054 |
+
"epoch": 2.179047619047619,
|
| 12055 |
+
"grad_norm": 0.96484375,
|
| 12056 |
+
"learning_rate": 3.1578375454547193e-06,
|
| 12057 |
+
"loss": 1.1545,
|
| 12058 |
+
"step": 1719
|
| 12059 |
+
},
|
| 12060 |
+
{
|
| 12061 |
+
"epoch": 2.18031746031746,
|
| 12062 |
+
"grad_norm": 0.90234375,
|
| 12063 |
+
"learning_rate": 3.15424753592972e-06,
|
| 12064 |
+
"loss": 1.0869,
|
| 12065 |
+
"step": 1720
|
| 12066 |
+
},
|
| 12067 |
+
{
|
| 12068 |
+
"epoch": 2.1815873015873017,
|
| 12069 |
+
"grad_norm": 1.0390625,
|
| 12070 |
+
"learning_rate": 3.1506578937166786e-06,
|
| 12071 |
+
"loss": 1.131,
|
| 12072 |
+
"step": 1721
|
| 12073 |
+
},
|
| 12074 |
+
{
|
| 12075 |
+
"epoch": 2.182857142857143,
|
| 12076 |
+
"grad_norm": 1.0,
|
| 12077 |
+
"learning_rate": 3.147068622629068e-06,
|
| 12078 |
+
"loss": 1.0826,
|
| 12079 |
+
"step": 1722
|
| 12080 |
+
},
|
| 12081 |
+
{
|
| 12082 |
+
"epoch": 2.1841269841269844,
|
| 12083 |
+
"grad_norm": 1.03125,
|
| 12084 |
+
"learning_rate": 3.1434797264799704e-06,
|
| 12085 |
+
"loss": 1.0663,
|
| 12086 |
+
"step": 1723
|
| 12087 |
+
},
|
| 12088 |
+
{
|
| 12089 |
+
"epoch": 2.1853968253968254,
|
| 12090 |
+
"grad_norm": 1.046875,
|
| 12091 |
+
"learning_rate": 3.1398912090820686e-06,
|
| 12092 |
+
"loss": 1.0954,
|
| 12093 |
+
"step": 1724
|
| 12094 |
+
},
|
| 12095 |
+
{
|
| 12096 |
+
"epoch": 2.1866666666666665,
|
| 12097 |
+
"grad_norm": 0.9375,
|
| 12098 |
+
"learning_rate": 3.136303074247643e-06,
|
| 12099 |
+
"loss": 1.1323,
|
| 12100 |
+
"step": 1725
|
| 12101 |
+
},
|
| 12102 |
+
{
|
| 12103 |
+
"epoch": 2.187936507936508,
|
| 12104 |
+
"grad_norm": 0.921875,
|
| 12105 |
+
"learning_rate": 3.1327153257885674e-06,
|
| 12106 |
+
"loss": 1.0114,
|
| 12107 |
+
"step": 1726
|
| 12108 |
+
},
|
| 12109 |
+
{
|
| 12110 |
+
"epoch": 2.189206349206349,
|
| 12111 |
+
"grad_norm": 0.984375,
|
| 12112 |
+
"learning_rate": 3.129127967516306e-06,
|
| 12113 |
+
"loss": 1.0659,
|
| 12114 |
+
"step": 1727
|
| 12115 |
+
},
|
| 12116 |
+
{
|
| 12117 |
+
"epoch": 2.1904761904761907,
|
| 12118 |
+
"grad_norm": 0.9453125,
|
| 12119 |
+
"learning_rate": 3.125541003241907e-06,
|
| 12120 |
+
"loss": 1.041,
|
| 12121 |
+
"step": 1728
|
| 12122 |
+
},
|
| 12123 |
+
{
|
| 12124 |
+
"epoch": 2.1917460317460318,
|
| 12125 |
+
"grad_norm": 0.890625,
|
| 12126 |
+
"learning_rate": 3.1219544367760006e-06,
|
| 12127 |
+
"loss": 1.0414,
|
| 12128 |
+
"step": 1729
|
| 12129 |
+
},
|
| 12130 |
+
{
|
| 12131 |
+
"epoch": 2.193015873015873,
|
| 12132 |
+
"grad_norm": 0.984375,
|
| 12133 |
+
"learning_rate": 3.1183682719287973e-06,
|
| 12134 |
+
"loss": 1.0896,
|
| 12135 |
+
"step": 1730
|
| 12136 |
+
},
|
| 12137 |
+
{
|
| 12138 |
+
"epoch": 2.1942857142857144,
|
| 12139 |
+
"grad_norm": 0.953125,
|
| 12140 |
+
"learning_rate": 3.1147825125100742e-06,
|
| 12141 |
+
"loss": 1.1423,
|
| 12142 |
+
"step": 1731
|
| 12143 |
+
},
|
| 12144 |
+
{
|
| 12145 |
+
"epoch": 2.1955555555555555,
|
| 12146 |
+
"grad_norm": 1.0546875,
|
| 12147 |
+
"learning_rate": 3.111197162329184e-06,
|
| 12148 |
+
"loss": 1.1852,
|
| 12149 |
+
"step": 1732
|
| 12150 |
+
},
|
| 12151 |
+
{
|
| 12152 |
+
"epoch": 2.196825396825397,
|
| 12153 |
+
"grad_norm": 0.984375,
|
| 12154 |
+
"learning_rate": 3.1076122251950428e-06,
|
| 12155 |
+
"loss": 1.1249,
|
| 12156 |
+
"step": 1733
|
| 12157 |
+
},
|
| 12158 |
+
{
|
| 12159 |
+
"epoch": 2.198095238095238,
|
| 12160 |
+
"grad_norm": 0.984375,
|
| 12161 |
+
"learning_rate": 3.104027704916126e-06,
|
| 12162 |
+
"loss": 1.202,
|
| 12163 |
+
"step": 1734
|
| 12164 |
+
},
|
| 12165 |
+
{
|
| 12166 |
+
"epoch": 2.199365079365079,
|
| 12167 |
+
"grad_norm": 1.1328125,
|
| 12168 |
+
"learning_rate": 3.1004436053004685e-06,
|
| 12169 |
+
"loss": 1.1197,
|
| 12170 |
+
"step": 1735
|
| 12171 |
+
},
|
| 12172 |
+
{
|
| 12173 |
+
"epoch": 2.2006349206349207,
|
| 12174 |
+
"grad_norm": 0.921875,
|
| 12175 |
+
"learning_rate": 3.096859930155658e-06,
|
| 12176 |
+
"loss": 1.0592,
|
| 12177 |
+
"step": 1736
|
| 12178 |
+
},
|
| 12179 |
+
{
|
| 12180 |
+
"epoch": 2.201904761904762,
|
| 12181 |
+
"grad_norm": 0.9296875,
|
| 12182 |
+
"learning_rate": 3.0932766832888297e-06,
|
| 12183 |
+
"loss": 1.0943,
|
| 12184 |
+
"step": 1737
|
| 12185 |
+
},
|
| 12186 |
+
{
|
| 12187 |
+
"epoch": 2.2031746031746033,
|
| 12188 |
+
"grad_norm": 0.90625,
|
| 12189 |
+
"learning_rate": 3.0896938685066655e-06,
|
| 12190 |
+
"loss": 1.0431,
|
| 12191 |
+
"step": 1738
|
| 12192 |
+
},
|
| 12193 |
+
{
|
| 12194 |
+
"epoch": 2.2044444444444444,
|
| 12195 |
+
"grad_norm": 1.0078125,
|
| 12196 |
+
"learning_rate": 3.0861114896153885e-06,
|
| 12197 |
+
"loss": 1.1072,
|
| 12198 |
+
"step": 1739
|
| 12199 |
+
},
|
| 12200 |
+
{
|
| 12201 |
+
"epoch": 2.2057142857142855,
|
| 12202 |
+
"grad_norm": 1.0078125,
|
| 12203 |
+
"learning_rate": 3.082529550420755e-06,
|
| 12204 |
+
"loss": 1.0871,
|
| 12205 |
+
"step": 1740
|
| 12206 |
+
},
|
| 12207 |
+
{
|
| 12208 |
+
"epoch": 2.206984126984127,
|
| 12209 |
+
"grad_norm": 1.078125,
|
| 12210 |
+
"learning_rate": 3.0789480547280586e-06,
|
| 12211 |
+
"loss": 1.0951,
|
| 12212 |
+
"step": 1741
|
| 12213 |
+
},
|
| 12214 |
+
{
|
| 12215 |
+
"epoch": 2.208253968253968,
|
| 12216 |
+
"grad_norm": 1.0078125,
|
| 12217 |
+
"learning_rate": 3.0753670063421223e-06,
|
| 12218 |
+
"loss": 1.1225,
|
| 12219 |
+
"step": 1742
|
| 12220 |
+
},
|
| 12221 |
+
{
|
| 12222 |
+
"epoch": 2.2095238095238097,
|
| 12223 |
+
"grad_norm": 0.9375,
|
| 12224 |
+
"learning_rate": 3.0717864090672884e-06,
|
| 12225 |
+
"loss": 1.0767,
|
| 12226 |
+
"step": 1743
|
| 12227 |
+
},
|
| 12228 |
+
{
|
| 12229 |
+
"epoch": 2.2107936507936508,
|
| 12230 |
+
"grad_norm": 0.9609375,
|
| 12231 |
+
"learning_rate": 3.0682062667074243e-06,
|
| 12232 |
+
"loss": 1.0996,
|
| 12233 |
+
"step": 1744
|
| 12234 |
+
},
|
| 12235 |
+
{
|
| 12236 |
+
"epoch": 2.212063492063492,
|
| 12237 |
+
"grad_norm": 0.95703125,
|
| 12238 |
+
"learning_rate": 3.0646265830659147e-06,
|
| 12239 |
+
"loss": 1.1124,
|
| 12240 |
+
"step": 1745
|
| 12241 |
+
},
|
| 12242 |
+
{
|
| 12243 |
+
"epoch": 2.2133333333333334,
|
| 12244 |
+
"grad_norm": 0.96875,
|
| 12245 |
+
"learning_rate": 3.0610473619456536e-06,
|
| 12246 |
+
"loss": 1.0755,
|
| 12247 |
+
"step": 1746
|
| 12248 |
+
},
|
| 12249 |
+
{
|
| 12250 |
+
"epoch": 2.2146031746031745,
|
| 12251 |
+
"grad_norm": 1.1015625,
|
| 12252 |
+
"learning_rate": 3.057468607149046e-06,
|
| 12253 |
+
"loss": 1.1035,
|
| 12254 |
+
"step": 1747
|
| 12255 |
+
},
|
| 12256 |
+
{
|
| 12257 |
+
"epoch": 2.215873015873016,
|
| 12258 |
+
"grad_norm": 0.97265625,
|
| 12259 |
+
"learning_rate": 3.0538903224780027e-06,
|
| 12260 |
+
"loss": 1.0679,
|
| 12261 |
+
"step": 1748
|
| 12262 |
+
},
|
| 12263 |
+
{
|
| 12264 |
+
"epoch": 2.217142857142857,
|
| 12265 |
+
"grad_norm": 1.0234375,
|
| 12266 |
+
"learning_rate": 3.0503125117339295e-06,
|
| 12267 |
+
"loss": 1.119,
|
| 12268 |
+
"step": 1749
|
| 12269 |
+
},
|
| 12270 |
+
{
|
| 12271 |
+
"epoch": 2.2184126984126986,
|
| 12272 |
+
"grad_norm": 1.046875,
|
| 12273 |
+
"learning_rate": 3.0467351787177354e-06,
|
| 12274 |
+
"loss": 1.1171,
|
| 12275 |
+
"step": 1750
|
| 12276 |
+
},
|
| 12277 |
+
{
|
| 12278 |
+
"epoch": 2.2196825396825397,
|
| 12279 |
+
"grad_norm": 0.97265625,
|
| 12280 |
+
"learning_rate": 3.0431583272298204e-06,
|
| 12281 |
+
"loss": 1.028,
|
| 12282 |
+
"step": 1751
|
| 12283 |
+
},
|
| 12284 |
+
{
|
| 12285 |
+
"epoch": 2.220952380952381,
|
| 12286 |
+
"grad_norm": 1.0,
|
| 12287 |
+
"learning_rate": 3.0395819610700676e-06,
|
| 12288 |
+
"loss": 1.0899,
|
| 12289 |
+
"step": 1752
|
| 12290 |
+
},
|
| 12291 |
+
{
|
| 12292 |
+
"epoch": 2.2222222222222223,
|
| 12293 |
+
"grad_norm": 1.03125,
|
| 12294 |
+
"learning_rate": 3.0360060840378504e-06,
|
| 12295 |
+
"loss": 1.2035,
|
| 12296 |
+
"step": 1753
|
| 12297 |
+
},
|
| 12298 |
+
{
|
| 12299 |
+
"epoch": 2.2234920634920634,
|
| 12300 |
+
"grad_norm": 1.015625,
|
| 12301 |
+
"learning_rate": 3.0324306999320217e-06,
|
| 12302 |
+
"loss": 1.0518,
|
| 12303 |
+
"step": 1754
|
| 12304 |
+
},
|
| 12305 |
+
{
|
| 12306 |
+
"epoch": 2.224761904761905,
|
| 12307 |
+
"grad_norm": 0.9609375,
|
| 12308 |
+
"learning_rate": 3.028855812550908e-06,
|
| 12309 |
+
"loss": 1.0406,
|
| 12310 |
+
"step": 1755
|
| 12311 |
+
},
|
| 12312 |
+
{
|
| 12313 |
+
"epoch": 2.226031746031746,
|
| 12314 |
+
"grad_norm": 1.0234375,
|
| 12315 |
+
"learning_rate": 3.02528142569231e-06,
|
| 12316 |
+
"loss": 1.0926,
|
| 12317 |
+
"step": 1756
|
| 12318 |
+
},
|
| 12319 |
+
{
|
| 12320 |
+
"epoch": 2.227301587301587,
|
| 12321 |
+
"grad_norm": 1.0859375,
|
| 12322 |
+
"learning_rate": 3.021707543153498e-06,
|
| 12323 |
+
"loss": 1.1265,
|
| 12324 |
+
"step": 1757
|
| 12325 |
+
},
|
| 12326 |
+
{
|
| 12327 |
+
"epoch": 2.2285714285714286,
|
| 12328 |
+
"grad_norm": 1.0703125,
|
| 12329 |
+
"learning_rate": 3.0181341687312035e-06,
|
| 12330 |
+
"loss": 1.1466,
|
| 12331 |
+
"step": 1758
|
| 12332 |
+
},
|
| 12333 |
+
{
|
| 12334 |
+
"epoch": 2.2298412698412697,
|
| 12335 |
+
"grad_norm": 0.9765625,
|
| 12336 |
+
"learning_rate": 3.0145613062216198e-06,
|
| 12337 |
+
"loss": 1.1103,
|
| 12338 |
+
"step": 1759
|
| 12339 |
+
},
|
| 12340 |
+
{
|
| 12341 |
+
"epoch": 2.2311111111111113,
|
| 12342 |
+
"grad_norm": 0.94921875,
|
| 12343 |
+
"learning_rate": 3.010988959420398e-06,
|
| 12344 |
+
"loss": 1.1407,
|
| 12345 |
+
"step": 1760
|
| 12346 |
+
},
|
| 12347 |
+
{
|
| 12348 |
+
"epoch": 2.2323809523809524,
|
| 12349 |
+
"grad_norm": 0.9609375,
|
| 12350 |
+
"learning_rate": 3.0074171321226373e-06,
|
| 12351 |
+
"loss": 1.0754,
|
| 12352 |
+
"step": 1761
|
| 12353 |
+
},
|
| 12354 |
+
{
|
| 12355 |
+
"epoch": 2.233650793650794,
|
| 12356 |
+
"grad_norm": 1.140625,
|
| 12357 |
+
"learning_rate": 3.0038458281228877e-06,
|
| 12358 |
+
"loss": 1.1313,
|
| 12359 |
+
"step": 1762
|
| 12360 |
+
},
|
| 12361 |
+
{
|
| 12362 |
+
"epoch": 2.234920634920635,
|
| 12363 |
+
"grad_norm": 1.0,
|
| 12364 |
+
"learning_rate": 3.000275051215144e-06,
|
| 12365 |
+
"loss": 1.057,
|
| 12366 |
+
"step": 1763
|
| 12367 |
+
},
|
| 12368 |
+
{
|
| 12369 |
+
"epoch": 2.236190476190476,
|
| 12370 |
+
"grad_norm": 0.9765625,
|
| 12371 |
+
"learning_rate": 2.996704805192839e-06,
|
| 12372 |
+
"loss": 1.1695,
|
| 12373 |
+
"step": 1764
|
| 12374 |
+
},
|
| 12375 |
+
{
|
| 12376 |
+
"epoch": 2.2374603174603176,
|
| 12377 |
+
"grad_norm": 0.96484375,
|
| 12378 |
+
"learning_rate": 2.9931350938488425e-06,
|
| 12379 |
+
"loss": 1.1953,
|
| 12380 |
+
"step": 1765
|
| 12381 |
+
},
|
| 12382 |
+
{
|
| 12383 |
+
"epoch": 2.2387301587301587,
|
| 12384 |
+
"grad_norm": 0.94921875,
|
| 12385 |
+
"learning_rate": 2.989565920975457e-06,
|
| 12386 |
+
"loss": 1.0479,
|
| 12387 |
+
"step": 1766
|
| 12388 |
+
},
|
| 12389 |
+
{
|
| 12390 |
+
"epoch": 2.24,
|
| 12391 |
+
"grad_norm": 0.96484375,
|
| 12392 |
+
"learning_rate": 2.985997290364411e-06,
|
| 12393 |
+
"loss": 1.0766,
|
| 12394 |
+
"step": 1767
|
| 12395 |
+
},
|
| 12396 |
+
{
|
| 12397 |
+
"epoch": 2.2412698412698413,
|
| 12398 |
+
"grad_norm": 0.97265625,
|
| 12399 |
+
"learning_rate": 2.9824292058068586e-06,
|
| 12400 |
+
"loss": 1.1509,
|
| 12401 |
+
"step": 1768
|
| 12402 |
+
},
|
| 12403 |
+
{
|
| 12404 |
+
"epoch": 2.2425396825396824,
|
| 12405 |
+
"grad_norm": 1.140625,
|
| 12406 |
+
"learning_rate": 2.9788616710933747e-06,
|
| 12407 |
+
"loss": 1.1135,
|
| 12408 |
+
"step": 1769
|
| 12409 |
+
},
|
| 12410 |
+
{
|
| 12411 |
+
"epoch": 2.243809523809524,
|
| 12412 |
+
"grad_norm": 1.0625,
|
| 12413 |
+
"learning_rate": 2.9752946900139457e-06,
|
| 12414 |
+
"loss": 1.0459,
|
| 12415 |
+
"step": 1770
|
| 12416 |
+
},
|
| 12417 |
+
{
|
| 12418 |
+
"epoch": 2.245079365079365,
|
| 12419 |
+
"grad_norm": 0.95703125,
|
| 12420 |
+
"learning_rate": 2.971728266357974e-06,
|
| 12421 |
+
"loss": 1.1093,
|
| 12422 |
+
"step": 1771
|
| 12423 |
+
},
|
| 12424 |
+
{
|
| 12425 |
+
"epoch": 2.2463492063492065,
|
| 12426 |
+
"grad_norm": 0.984375,
|
| 12427 |
+
"learning_rate": 2.9681624039142707e-06,
|
| 12428 |
+
"loss": 1.0816,
|
| 12429 |
+
"step": 1772
|
| 12430 |
+
},
|
| 12431 |
+
{
|
| 12432 |
+
"epoch": 2.2476190476190476,
|
| 12433 |
+
"grad_norm": 1.0859375,
|
| 12434 |
+
"learning_rate": 2.9645971064710454e-06,
|
| 12435 |
+
"loss": 1.1727,
|
| 12436 |
+
"step": 1773
|
| 12437 |
+
},
|
| 12438 |
+
{
|
| 12439 |
+
"epoch": 2.2488888888888887,
|
| 12440 |
+
"grad_norm": 1.1328125,
|
| 12441 |
+
"learning_rate": 2.9610323778159122e-06,
|
| 12442 |
+
"loss": 1.1136,
|
| 12443 |
+
"step": 1774
|
| 12444 |
+
},
|
| 12445 |
+
{
|
| 12446 |
+
"epoch": 2.2501587301587302,
|
| 12447 |
+
"grad_norm": 1.1015625,
|
| 12448 |
+
"learning_rate": 2.95746822173588e-06,
|
| 12449 |
+
"loss": 1.1094,
|
| 12450 |
+
"step": 1775
|
| 12451 |
+
},
|
| 12452 |
+
{
|
| 12453 |
+
"epoch": 2.2514285714285713,
|
| 12454 |
+
"grad_norm": 1.046875,
|
| 12455 |
+
"learning_rate": 2.9539046420173476e-06,
|
| 12456 |
+
"loss": 1.1155,
|
| 12457 |
+
"step": 1776
|
| 12458 |
+
},
|
| 12459 |
+
{
|
| 12460 |
+
"epoch": 2.252698412698413,
|
| 12461 |
+
"grad_norm": 1.03125,
|
| 12462 |
+
"learning_rate": 2.9503416424461035e-06,
|
| 12463 |
+
"loss": 1.1436,
|
| 12464 |
+
"step": 1777
|
| 12465 |
+
},
|
| 12466 |
+
{
|
| 12467 |
+
"epoch": 2.253968253968254,
|
| 12468 |
+
"grad_norm": 1.0390625,
|
| 12469 |
+
"learning_rate": 2.94677922680732e-06,
|
| 12470 |
+
"loss": 1.0944,
|
| 12471 |
+
"step": 1778
|
| 12472 |
+
},
|
| 12473 |
+
{
|
| 12474 |
+
"epoch": 2.255238095238095,
|
| 12475 |
+
"grad_norm": 1.0859375,
|
| 12476 |
+
"learning_rate": 2.9432173988855457e-06,
|
| 12477 |
+
"loss": 1.1316,
|
| 12478 |
+
"step": 1779
|
| 12479 |
+
},
|
| 12480 |
+
{
|
| 12481 |
+
"epoch": 2.2565079365079366,
|
| 12482 |
+
"grad_norm": 1.015625,
|
| 12483 |
+
"learning_rate": 2.9396561624647085e-06,
|
| 12484 |
+
"loss": 1.0358,
|
| 12485 |
+
"step": 1780
|
| 12486 |
+
},
|
| 12487 |
+
{
|
| 12488 |
+
"epoch": 2.2577777777777777,
|
| 12489 |
+
"grad_norm": 1.046875,
|
| 12490 |
+
"learning_rate": 2.9360955213281087e-06,
|
| 12491 |
+
"loss": 1.1072,
|
| 12492 |
+
"step": 1781
|
| 12493 |
+
},
|
| 12494 |
+
{
|
| 12495 |
+
"epoch": 2.259047619047619,
|
| 12496 |
+
"grad_norm": 0.97265625,
|
| 12497 |
+
"learning_rate": 2.9325354792584094e-06,
|
| 12498 |
+
"loss": 1.0899,
|
| 12499 |
+
"step": 1782
|
| 12500 |
+
},
|
| 12501 |
+
{
|
| 12502 |
+
"epoch": 2.2603174603174603,
|
| 12503 |
+
"grad_norm": 0.9140625,
|
| 12504 |
+
"learning_rate": 2.9289760400376406e-06,
|
| 12505 |
+
"loss": 1.0689,
|
| 12506 |
+
"step": 1783
|
| 12507 |
+
},
|
| 12508 |
+
{
|
| 12509 |
+
"epoch": 2.2615873015873014,
|
| 12510 |
+
"grad_norm": 0.96484375,
|
| 12511 |
+
"learning_rate": 2.925417207447192e-06,
|
| 12512 |
+
"loss": 1.047,
|
| 12513 |
+
"step": 1784
|
| 12514 |
+
},
|
| 12515 |
+
{
|
| 12516 |
+
"epoch": 2.262857142857143,
|
| 12517 |
+
"grad_norm": 1.03125,
|
| 12518 |
+
"learning_rate": 2.921858985267809e-06,
|
| 12519 |
+
"loss": 1.1,
|
| 12520 |
+
"step": 1785
|
| 12521 |
+
},
|
| 12522 |
+
{
|
| 12523 |
+
"epoch": 2.264126984126984,
|
| 12524 |
+
"grad_norm": 1.109375,
|
| 12525 |
+
"learning_rate": 2.918301377279586e-06,
|
| 12526 |
+
"loss": 1.1428,
|
| 12527 |
+
"step": 1786
|
| 12528 |
+
},
|
| 12529 |
+
{
|
| 12530 |
+
"epoch": 2.2653968253968255,
|
| 12531 |
+
"grad_norm": 1.0546875,
|
| 12532 |
+
"learning_rate": 2.9147443872619674e-06,
|
| 12533 |
+
"loss": 1.152,
|
| 12534 |
+
"step": 1787
|
| 12535 |
+
},
|
| 12536 |
+
{
|
| 12537 |
+
"epoch": 2.2666666666666666,
|
| 12538 |
+
"grad_norm": 0.90234375,
|
| 12539 |
+
"learning_rate": 2.9111880189937417e-06,
|
| 12540 |
+
"loss": 1.0685,
|
| 12541 |
+
"step": 1788
|
| 12542 |
+
},
|
| 12543 |
+
{
|
| 12544 |
+
"epoch": 2.2679365079365077,
|
| 12545 |
+
"grad_norm": 0.890625,
|
| 12546 |
+
"learning_rate": 2.907632276253033e-06,
|
| 12547 |
+
"loss": 1.0719,
|
| 12548 |
+
"step": 1789
|
| 12549 |
+
},
|
| 12550 |
+
{
|
| 12551 |
+
"epoch": 2.2692063492063492,
|
| 12552 |
+
"grad_norm": 1.015625,
|
| 12553 |
+
"learning_rate": 2.9040771628173044e-06,
|
| 12554 |
+
"loss": 1.1062,
|
| 12555 |
+
"step": 1790
|
| 12556 |
+
},
|
| 12557 |
+
{
|
| 12558 |
+
"epoch": 2.2704761904761903,
|
| 12559 |
+
"grad_norm": 0.93359375,
|
| 12560 |
+
"learning_rate": 2.9005226824633515e-06,
|
| 12561 |
+
"loss": 1.0439,
|
| 12562 |
+
"step": 1791
|
| 12563 |
+
},
|
| 12564 |
+
{
|
| 12565 |
+
"epoch": 2.271746031746032,
|
| 12566 |
+
"grad_norm": 1.03125,
|
| 12567 |
+
"learning_rate": 2.896968838967292e-06,
|
| 12568 |
+
"loss": 1.1011,
|
| 12569 |
+
"step": 1792
|
| 12570 |
+
},
|
| 12571 |
+
{
|
| 12572 |
+
"epoch": 2.273015873015873,
|
| 12573 |
+
"grad_norm": 1.0390625,
|
| 12574 |
+
"learning_rate": 2.8934156361045714e-06,
|
| 12575 |
+
"loss": 1.077,
|
| 12576 |
+
"step": 1793
|
| 12577 |
+
},
|
| 12578 |
+
{
|
| 12579 |
+
"epoch": 2.2742857142857145,
|
| 12580 |
+
"grad_norm": 1.0546875,
|
| 12581 |
+
"learning_rate": 2.889863077649955e-06,
|
| 12582 |
+
"loss": 1.1206,
|
| 12583 |
+
"step": 1794
|
| 12584 |
+
},
|
| 12585 |
+
{
|
| 12586 |
+
"epoch": 2.2755555555555556,
|
| 12587 |
+
"grad_norm": 0.96875,
|
| 12588 |
+
"learning_rate": 2.88631116737752e-06,
|
| 12589 |
+
"loss": 1.0656,
|
| 12590 |
+
"step": 1795
|
| 12591 |
+
},
|
| 12592 |
+
{
|
| 12593 |
+
"epoch": 2.2768253968253966,
|
| 12594 |
+
"grad_norm": 0.94140625,
|
| 12595 |
+
"learning_rate": 2.8827599090606576e-06,
|
| 12596 |
+
"loss": 1.0776,
|
| 12597 |
+
"step": 1796
|
| 12598 |
+
},
|
| 12599 |
+
{
|
| 12600 |
+
"epoch": 2.278095238095238,
|
| 12601 |
+
"grad_norm": 1.0078125,
|
| 12602 |
+
"learning_rate": 2.8792093064720675e-06,
|
| 12603 |
+
"loss": 1.0576,
|
| 12604 |
+
"step": 1797
|
| 12605 |
+
},
|
| 12606 |
+
{
|
| 12607 |
+
"epoch": 2.2793650793650793,
|
| 12608 |
+
"grad_norm": 1.0625,
|
| 12609 |
+
"learning_rate": 2.8756593633837486e-06,
|
| 12610 |
+
"loss": 1.0584,
|
| 12611 |
+
"step": 1798
|
| 12612 |
+
},
|
| 12613 |
+
{
|
| 12614 |
+
"epoch": 2.280634920634921,
|
| 12615 |
+
"grad_norm": 1.015625,
|
| 12616 |
+
"learning_rate": 2.872110083567003e-06,
|
| 12617 |
+
"loss": 1.0951,
|
| 12618 |
+
"step": 1799
|
| 12619 |
+
},
|
| 12620 |
+
{
|
| 12621 |
+
"epoch": 2.281904761904762,
|
| 12622 |
+
"grad_norm": 0.97265625,
|
| 12623 |
+
"learning_rate": 2.8685614707924283e-06,
|
| 12624 |
+
"loss": 1.1586,
|
| 12625 |
+
"step": 1800
|
| 12626 |
+
},
|
| 12627 |
+
{
|
| 12628 |
+
"epoch": 2.2831746031746034,
|
| 12629 |
+
"grad_norm": 1.0546875,
|
| 12630 |
+
"learning_rate": 2.865013528829909e-06,
|
| 12631 |
+
"loss": 1.116,
|
| 12632 |
+
"step": 1801
|
| 12633 |
+
},
|
| 12634 |
+
{
|
| 12635 |
+
"epoch": 2.2844444444444445,
|
| 12636 |
+
"grad_norm": 1.0,
|
| 12637 |
+
"learning_rate": 2.861466261448621e-06,
|
| 12638 |
+
"loss": 1.0048,
|
| 12639 |
+
"step": 1802
|
| 12640 |
+
},
|
| 12641 |
+
{
|
| 12642 |
+
"epoch": 2.2857142857142856,
|
| 12643 |
+
"grad_norm": 0.94140625,
|
| 12644 |
+
"learning_rate": 2.8579196724170244e-06,
|
| 12645 |
+
"loss": 1.1141,
|
| 12646 |
+
"step": 1803
|
| 12647 |
+
},
|
| 12648 |
+
{
|
| 12649 |
+
"epoch": 2.286984126984127,
|
| 12650 |
+
"grad_norm": 1.015625,
|
| 12651 |
+
"learning_rate": 2.8543737655028544e-06,
|
| 12652 |
+
"loss": 1.1467,
|
| 12653 |
+
"step": 1804
|
| 12654 |
+
},
|
| 12655 |
+
{
|
| 12656 |
+
"epoch": 2.288253968253968,
|
| 12657 |
+
"grad_norm": 1.1171875,
|
| 12658 |
+
"learning_rate": 2.850828544473125e-06,
|
| 12659 |
+
"loss": 1.1634,
|
| 12660 |
+
"step": 1805
|
| 12661 |
+
},
|
| 12662 |
+
{
|
| 12663 |
+
"epoch": 2.2895238095238097,
|
| 12664 |
+
"grad_norm": 1.0859375,
|
| 12665 |
+
"learning_rate": 2.847284013094121e-06,
|
| 12666 |
+
"loss": 1.1187,
|
| 12667 |
+
"step": 1806
|
| 12668 |
+
},
|
| 12669 |
+
{
|
| 12670 |
+
"epoch": 2.290793650793651,
|
| 12671 |
+
"grad_norm": 0.984375,
|
| 12672 |
+
"learning_rate": 2.8437401751313927e-06,
|
| 12673 |
+
"loss": 1.1661,
|
| 12674 |
+
"step": 1807
|
| 12675 |
+
},
|
| 12676 |
+
{
|
| 12677 |
+
"epoch": 2.292063492063492,
|
| 12678 |
+
"grad_norm": 0.97265625,
|
| 12679 |
+
"learning_rate": 2.840197034349756e-06,
|
| 12680 |
+
"loss": 1.1115,
|
| 12681 |
+
"step": 1808
|
| 12682 |
+
},
|
| 12683 |
+
{
|
| 12684 |
+
"epoch": 2.2933333333333334,
|
| 12685 |
+
"grad_norm": 1.015625,
|
| 12686 |
+
"learning_rate": 2.8366545945132864e-06,
|
| 12687 |
+
"loss": 1.0922,
|
| 12688 |
+
"step": 1809
|
| 12689 |
+
},
|
| 12690 |
+
{
|
| 12691 |
+
"epoch": 2.2946031746031745,
|
| 12692 |
+
"grad_norm": 0.9765625,
|
| 12693 |
+
"learning_rate": 2.833112859385311e-06,
|
| 12694 |
+
"loss": 1.0849,
|
| 12695 |
+
"step": 1810
|
| 12696 |
+
},
|
| 12697 |
+
{
|
| 12698 |
+
"epoch": 2.295873015873016,
|
| 12699 |
+
"grad_norm": 1.0390625,
|
| 12700 |
+
"learning_rate": 2.829571832728411e-06,
|
| 12701 |
+
"loss": 1.117,
|
| 12702 |
+
"step": 1811
|
| 12703 |
+
},
|
| 12704 |
+
{
|
| 12705 |
+
"epoch": 2.297142857142857,
|
| 12706 |
+
"grad_norm": 0.91015625,
|
| 12707 |
+
"learning_rate": 2.826031518304418e-06,
|
| 12708 |
+
"loss": 1.0563,
|
| 12709 |
+
"step": 1812
|
| 12710 |
+
},
|
| 12711 |
+
{
|
| 12712 |
+
"epoch": 2.2984126984126982,
|
| 12713 |
+
"grad_norm": 1.046875,
|
| 12714 |
+
"learning_rate": 2.8224919198744003e-06,
|
| 12715 |
+
"loss": 1.0758,
|
| 12716 |
+
"step": 1813
|
| 12717 |
+
},
|
| 12718 |
+
{
|
| 12719 |
+
"epoch": 2.2996825396825398,
|
| 12720 |
+
"grad_norm": 0.93359375,
|
| 12721 |
+
"learning_rate": 2.8189530411986697e-06,
|
| 12722 |
+
"loss": 1.0432,
|
| 12723 |
+
"step": 1814
|
| 12724 |
+
},
|
| 12725 |
+
{
|
| 12726 |
+
"epoch": 2.300952380952381,
|
| 12727 |
+
"grad_norm": 0.9140625,
|
| 12728 |
+
"learning_rate": 2.8154148860367745e-06,
|
| 12729 |
+
"loss": 1.0024,
|
| 12730 |
+
"step": 1815
|
| 12731 |
+
},
|
| 12732 |
+
{
|
| 12733 |
+
"epoch": 2.3022222222222224,
|
| 12734 |
+
"grad_norm": 0.98828125,
|
| 12735 |
+
"learning_rate": 2.8118774581474903e-06,
|
| 12736 |
+
"loss": 1.0775,
|
| 12737 |
+
"step": 1816
|
| 12738 |
+
},
|
| 12739 |
+
{
|
| 12740 |
+
"epoch": 2.3034920634920635,
|
| 12741 |
+
"grad_norm": 0.9296875,
|
| 12742 |
+
"learning_rate": 2.8083407612888232e-06,
|
| 12743 |
+
"loss": 1.0708,
|
| 12744 |
+
"step": 1817
|
| 12745 |
+
},
|
| 12746 |
+
{
|
| 12747 |
+
"epoch": 2.3047619047619046,
|
| 12748 |
+
"grad_norm": 0.9296875,
|
| 12749 |
+
"learning_rate": 2.8048047992180034e-06,
|
| 12750 |
+
"loss": 1.1002,
|
| 12751 |
+
"step": 1818
|
| 12752 |
+
},
|
| 12753 |
+
{
|
| 12754 |
+
"epoch": 2.306031746031746,
|
| 12755 |
+
"grad_norm": 0.953125,
|
| 12756 |
+
"learning_rate": 2.8012695756914745e-06,
|
| 12757 |
+
"loss": 1.0636,
|
| 12758 |
+
"step": 1819
|
| 12759 |
+
},
|
| 12760 |
+
{
|
| 12761 |
+
"epoch": 2.307301587301587,
|
| 12762 |
+
"grad_norm": 0.98046875,
|
| 12763 |
+
"learning_rate": 2.797735094464904e-06,
|
| 12764 |
+
"loss": 1.1096,
|
| 12765 |
+
"step": 1820
|
| 12766 |
+
},
|
| 12767 |
+
{
|
| 12768 |
+
"epoch": 2.3085714285714287,
|
| 12769 |
+
"grad_norm": 1.0234375,
|
| 12770 |
+
"learning_rate": 2.794201359293167e-06,
|
| 12771 |
+
"loss": 1.1431,
|
| 12772 |
+
"step": 1821
|
| 12773 |
+
},
|
| 12774 |
+
{
|
| 12775 |
+
"epoch": 2.30984126984127,
|
| 12776 |
+
"grad_norm": 0.9453125,
|
| 12777 |
+
"learning_rate": 2.790668373930342e-06,
|
| 12778 |
+
"loss": 1.0675,
|
| 12779 |
+
"step": 1822
|
| 12780 |
+
},
|
| 12781 |
+
{
|
| 12782 |
+
"epoch": 2.311111111111111,
|
| 12783 |
+
"grad_norm": 0.96484375,
|
| 12784 |
+
"learning_rate": 2.7871361421297176e-06,
|
| 12785 |
+
"loss": 1.1974,
|
| 12786 |
+
"step": 1823
|
| 12787 |
+
},
|
| 12788 |
+
{
|
| 12789 |
+
"epoch": 2.3123809523809524,
|
| 12790 |
+
"grad_norm": 0.95703125,
|
| 12791 |
+
"learning_rate": 2.783604667643778e-06,
|
| 12792 |
+
"loss": 1.1248,
|
| 12793 |
+
"step": 1824
|
| 12794 |
+
},
|
| 12795 |
+
{
|
| 12796 |
+
"epoch": 2.3136507936507935,
|
| 12797 |
+
"grad_norm": 1.03125,
|
| 12798 |
+
"learning_rate": 2.7800739542242037e-06,
|
| 12799 |
+
"loss": 1.1497,
|
| 12800 |
+
"step": 1825
|
| 12801 |
+
},
|
| 12802 |
+
{
|
| 12803 |
+
"epoch": 2.314920634920635,
|
| 12804 |
+
"grad_norm": 0.9609375,
|
| 12805 |
+
"learning_rate": 2.776544005621866e-06,
|
| 12806 |
+
"loss": 1.0875,
|
| 12807 |
+
"step": 1826
|
| 12808 |
+
},
|
| 12809 |
+
{
|
| 12810 |
+
"epoch": 2.316190476190476,
|
| 12811 |
+
"grad_norm": 0.98046875,
|
| 12812 |
+
"learning_rate": 2.7730148255868272e-06,
|
| 12813 |
+
"loss": 1.0432,
|
| 12814 |
+
"step": 1827
|
| 12815 |
+
},
|
| 12816 |
+
{
|
| 12817 |
+
"epoch": 2.317460317460317,
|
| 12818 |
+
"grad_norm": 1.0390625,
|
| 12819 |
+
"learning_rate": 2.7694864178683274e-06,
|
| 12820 |
+
"loss": 1.2387,
|
| 12821 |
+
"step": 1828
|
| 12822 |
+
},
|
| 12823 |
+
{
|
| 12824 |
+
"epoch": 2.3187301587301588,
|
| 12825 |
+
"grad_norm": 0.8828125,
|
| 12826 |
+
"learning_rate": 2.76595878621479e-06,
|
| 12827 |
+
"loss": 1.0651,
|
| 12828 |
+
"step": 1829
|
| 12829 |
+
},
|
| 12830 |
+
{
|
| 12831 |
+
"epoch": 2.32,
|
| 12832 |
+
"grad_norm": 0.98046875,
|
| 12833 |
+
"learning_rate": 2.762431934373815e-06,
|
| 12834 |
+
"loss": 0.9725,
|
| 12835 |
+
"step": 1830
|
| 12836 |
+
},
|
| 12837 |
+
{
|
| 12838 |
+
"epoch": 2.3212698412698414,
|
| 12839 |
+
"grad_norm": 0.97265625,
|
| 12840 |
+
"learning_rate": 2.758905866092169e-06,
|
| 12841 |
+
"loss": 1.0772,
|
| 12842 |
+
"step": 1831
|
| 12843 |
+
},
|
| 12844 |
+
{
|
| 12845 |
+
"epoch": 2.3225396825396825,
|
| 12846 |
+
"grad_norm": 1.0234375,
|
| 12847 |
+
"learning_rate": 2.7553805851157908e-06,
|
| 12848 |
+
"loss": 1.1864,
|
| 12849 |
+
"step": 1832
|
| 12850 |
+
},
|
| 12851 |
+
{
|
| 12852 |
+
"epoch": 2.323809523809524,
|
| 12853 |
+
"grad_norm": 0.9609375,
|
| 12854 |
+
"learning_rate": 2.751856095189782e-06,
|
| 12855 |
+
"loss": 1.0956,
|
| 12856 |
+
"step": 1833
|
| 12857 |
+
},
|
| 12858 |
+
{
|
| 12859 |
+
"epoch": 2.325079365079365,
|
| 12860 |
+
"grad_norm": 0.96875,
|
| 12861 |
+
"learning_rate": 2.748332400058401e-06,
|
| 12862 |
+
"loss": 1.0685,
|
| 12863 |
+
"step": 1834
|
| 12864 |
+
},
|
| 12865 |
+
{
|
| 12866 |
+
"epoch": 2.326349206349206,
|
| 12867 |
+
"grad_norm": 1.015625,
|
| 12868 |
+
"learning_rate": 2.744809503465066e-06,
|
| 12869 |
+
"loss": 1.1514,
|
| 12870 |
+
"step": 1835
|
| 12871 |
+
},
|
| 12872 |
+
{
|
| 12873 |
+
"epoch": 2.3276190476190477,
|
| 12874 |
+
"grad_norm": 0.9765625,
|
| 12875 |
+
"learning_rate": 2.7412874091523443e-06,
|
| 12876 |
+
"loss": 1.1161,
|
| 12877 |
+
"step": 1836
|
| 12878 |
+
},
|
| 12879 |
+
{
|
| 12880 |
+
"epoch": 2.328888888888889,
|
| 12881 |
+
"grad_norm": 0.99609375,
|
| 12882 |
+
"learning_rate": 2.7377661208619507e-06,
|
| 12883 |
+
"loss": 1.1107,
|
| 12884 |
+
"step": 1837
|
| 12885 |
+
},
|
| 12886 |
+
{
|
| 12887 |
+
"epoch": 2.3301587301587303,
|
| 12888 |
+
"grad_norm": 0.984375,
|
| 12889 |
+
"learning_rate": 2.7342456423347447e-06,
|
| 12890 |
+
"loss": 1.1623,
|
| 12891 |
+
"step": 1838
|
| 12892 |
+
},
|
| 12893 |
+
{
|
| 12894 |
+
"epoch": 2.3314285714285714,
|
| 12895 |
+
"grad_norm": 0.9765625,
|
| 12896 |
+
"learning_rate": 2.730725977310727e-06,
|
| 12897 |
+
"loss": 1.0283,
|
| 12898 |
+
"step": 1839
|
| 12899 |
+
},
|
| 12900 |
+
{
|
| 12901 |
+
"epoch": 2.332698412698413,
|
| 12902 |
+
"grad_norm": 0.95703125,
|
| 12903 |
+
"learning_rate": 2.7272071295290287e-06,
|
| 12904 |
+
"loss": 1.0891,
|
| 12905 |
+
"step": 1840
|
| 12906 |
+
},
|
| 12907 |
+
{
|
| 12908 |
+
"epoch": 2.333968253968254,
|
| 12909 |
+
"grad_norm": 1.03125,
|
| 12910 |
+
"learning_rate": 2.7236891027279193e-06,
|
| 12911 |
+
"loss": 1.1403,
|
| 12912 |
+
"step": 1841
|
| 12913 |
+
},
|
| 12914 |
+
{
|
| 12915 |
+
"epoch": 2.335238095238095,
|
| 12916 |
+
"grad_norm": 1.015625,
|
| 12917 |
+
"learning_rate": 2.720171900644792e-06,
|
| 12918 |
+
"loss": 1.1341,
|
| 12919 |
+
"step": 1842
|
| 12920 |
+
},
|
| 12921 |
+
{
|
| 12922 |
+
"epoch": 2.3365079365079366,
|
| 12923 |
+
"grad_norm": 0.921875,
|
| 12924 |
+
"learning_rate": 2.716655527016165e-06,
|
| 12925 |
+
"loss": 1.1241,
|
| 12926 |
+
"step": 1843
|
| 12927 |
+
},
|
| 12928 |
+
{
|
| 12929 |
+
"epoch": 2.3377777777777777,
|
| 12930 |
+
"grad_norm": 1.03125,
|
| 12931 |
+
"learning_rate": 2.713139985577677e-06,
|
| 12932 |
+
"loss": 1.1266,
|
| 12933 |
+
"step": 1844
|
| 12934 |
+
},
|
| 12935 |
+
{
|
| 12936 |
+
"epoch": 2.3390476190476193,
|
| 12937 |
+
"grad_norm": 0.96484375,
|
| 12938 |
+
"learning_rate": 2.7096252800640815e-06,
|
| 12939 |
+
"loss": 1.1383,
|
| 12940 |
+
"step": 1845
|
| 12941 |
+
},
|
| 12942 |
+
{
|
| 12943 |
+
"epoch": 2.3403174603174604,
|
| 12944 |
+
"grad_norm": 0.93359375,
|
| 12945 |
+
"learning_rate": 2.7061114142092446e-06,
|
| 12946 |
+
"loss": 1.1058,
|
| 12947 |
+
"step": 1846
|
| 12948 |
+
},
|
| 12949 |
+
{
|
| 12950 |
+
"epoch": 2.3415873015873014,
|
| 12951 |
+
"grad_norm": 1.0546875,
|
| 12952 |
+
"learning_rate": 2.7025983917461403e-06,
|
| 12953 |
+
"loss": 1.0996,
|
| 12954 |
+
"step": 1847
|
| 12955 |
+
},
|
| 12956 |
+
{
|
| 12957 |
+
"epoch": 2.342857142857143,
|
| 12958 |
+
"grad_norm": 0.97265625,
|
| 12959 |
+
"learning_rate": 2.6990862164068457e-06,
|
| 12960 |
+
"loss": 1.0692,
|
| 12961 |
+
"step": 1848
|
| 12962 |
+
},
|
| 12963 |
+
{
|
| 12964 |
+
"epoch": 2.344126984126984,
|
| 12965 |
+
"grad_norm": 0.953125,
|
| 12966 |
+
"learning_rate": 2.6955748919225423e-06,
|
| 12967 |
+
"loss": 1.0931,
|
| 12968 |
+
"step": 1849
|
| 12969 |
+
},
|
| 12970 |
+
{
|
| 12971 |
+
"epoch": 2.3453968253968256,
|
| 12972 |
+
"grad_norm": 1.0,
|
| 12973 |
+
"learning_rate": 2.6920644220234987e-06,
|
| 12974 |
+
"loss": 1.1498,
|
| 12975 |
+
"step": 1850
|
| 12976 |
+
},
|
| 12977 |
+
{
|
| 12978 |
+
"epoch": 2.3466666666666667,
|
| 12979 |
+
"grad_norm": 1.0078125,
|
| 12980 |
+
"learning_rate": 2.6885548104390847e-06,
|
| 12981 |
+
"loss": 1.1273,
|
| 12982 |
+
"step": 1851
|
| 12983 |
+
},
|
| 12984 |
+
{
|
| 12985 |
+
"epoch": 2.3479365079365078,
|
| 12986 |
+
"grad_norm": 0.921875,
|
| 12987 |
+
"learning_rate": 2.6850460608977553e-06,
|
| 12988 |
+
"loss": 1.0613,
|
| 12989 |
+
"step": 1852
|
| 12990 |
+
},
|
| 12991 |
+
{
|
| 12992 |
+
"epoch": 2.3492063492063493,
|
| 12993 |
+
"grad_norm": 0.97265625,
|
| 12994 |
+
"learning_rate": 2.681538177127047e-06,
|
| 12995 |
+
"loss": 1.053,
|
| 12996 |
+
"step": 1853
|
| 12997 |
+
},
|
| 12998 |
+
{
|
| 12999 |
+
"epoch": 2.3504761904761904,
|
| 13000 |
+
"grad_norm": 0.95703125,
|
| 13001 |
+
"learning_rate": 2.6780311628535786e-06,
|
| 13002 |
+
"loss": 1.0882,
|
| 13003 |
+
"step": 1854
|
| 13004 |
+
},
|
| 13005 |
+
{
|
| 13006 |
+
"epoch": 2.351746031746032,
|
| 13007 |
+
"grad_norm": 0.953125,
|
| 13008 |
+
"learning_rate": 2.674525021803048e-06,
|
| 13009 |
+
"loss": 1.0766,
|
| 13010 |
+
"step": 1855
|
| 13011 |
+
},
|
| 13012 |
+
{
|
| 13013 |
+
"epoch": 2.353015873015873,
|
| 13014 |
+
"grad_norm": 0.9921875,
|
| 13015 |
+
"learning_rate": 2.671019757700219e-06,
|
| 13016 |
+
"loss": 1.1448,
|
| 13017 |
+
"step": 1856
|
| 13018 |
+
},
|
| 13019 |
+
{
|
| 13020 |
+
"epoch": 2.354285714285714,
|
| 13021 |
+
"grad_norm": 0.9921875,
|
| 13022 |
+
"learning_rate": 2.66751537426893e-06,
|
| 13023 |
+
"loss": 1.1185,
|
| 13024 |
+
"step": 1857
|
| 13025 |
+
},
|
| 13026 |
+
{
|
| 13027 |
+
"epoch": 2.3555555555555556,
|
| 13028 |
+
"grad_norm": 1.015625,
|
| 13029 |
+
"learning_rate": 2.6640118752320805e-06,
|
| 13030 |
+
"loss": 1.1182,
|
| 13031 |
+
"step": 1858
|
| 13032 |
+
},
|
| 13033 |
+
{
|
| 13034 |
+
"epoch": 2.3568253968253967,
|
| 13035 |
+
"grad_norm": 0.95703125,
|
| 13036 |
+
"learning_rate": 2.660509264311631e-06,
|
| 13037 |
+
"loss": 1.1698,
|
| 13038 |
+
"step": 1859
|
| 13039 |
+
},
|
| 13040 |
+
{
|
| 13041 |
+
"epoch": 2.3580952380952382,
|
| 13042 |
+
"grad_norm": 0.953125,
|
| 13043 |
+
"learning_rate": 2.657007545228599e-06,
|
| 13044 |
+
"loss": 1.1668,
|
| 13045 |
+
"step": 1860
|
| 13046 |
+
},
|
| 13047 |
+
{
|
| 13048 |
+
"epoch": 2.3593650793650793,
|
| 13049 |
+
"grad_norm": 1.0078125,
|
| 13050 |
+
"learning_rate": 2.653506721703055e-06,
|
| 13051 |
+
"loss": 1.1547,
|
| 13052 |
+
"step": 1861
|
| 13053 |
+
},
|
| 13054 |
+
{
|
| 13055 |
+
"epoch": 2.3606349206349204,
|
| 13056 |
+
"grad_norm": 0.98828125,
|
| 13057 |
+
"learning_rate": 2.650006797454115e-06,
|
| 13058 |
+
"loss": 1.1429,
|
| 13059 |
+
"step": 1862
|
| 13060 |
+
},
|
| 13061 |
+
{
|
| 13062 |
+
"epoch": 2.361904761904762,
|
| 13063 |
+
"grad_norm": 0.984375,
|
| 13064 |
+
"learning_rate": 2.6465077761999445e-06,
|
| 13065 |
+
"loss": 1.1267,
|
| 13066 |
+
"step": 1863
|
| 13067 |
+
},
|
| 13068 |
+
{
|
| 13069 |
+
"epoch": 2.363174603174603,
|
| 13070 |
+
"grad_norm": 1.0078125,
|
| 13071 |
+
"learning_rate": 2.6430096616577462e-06,
|
| 13072 |
+
"loss": 1.1491,
|
| 13073 |
+
"step": 1864
|
| 13074 |
+
},
|
| 13075 |
+
{
|
| 13076 |
+
"epoch": 2.3644444444444446,
|
| 13077 |
+
"grad_norm": 0.953125,
|
| 13078 |
+
"learning_rate": 2.63951245754376e-06,
|
| 13079 |
+
"loss": 1.0985,
|
| 13080 |
+
"step": 1865
|
| 13081 |
+
},
|
| 13082 |
+
{
|
| 13083 |
+
"epoch": 2.3657142857142857,
|
| 13084 |
+
"grad_norm": 1.0234375,
|
| 13085 |
+
"learning_rate": 2.63601616757326e-06,
|
| 13086 |
+
"loss": 1.1629,
|
| 13087 |
+
"step": 1866
|
| 13088 |
+
},
|
| 13089 |
+
{
|
| 13090 |
+
"epoch": 2.3669841269841267,
|
| 13091 |
+
"grad_norm": 1.0234375,
|
| 13092 |
+
"learning_rate": 2.6325207954605487e-06,
|
| 13093 |
+
"loss": 1.1326,
|
| 13094 |
+
"step": 1867
|
| 13095 |
+
},
|
| 13096 |
+
{
|
| 13097 |
+
"epoch": 2.3682539682539683,
|
| 13098 |
+
"grad_norm": 1.0234375,
|
| 13099 |
+
"learning_rate": 2.629026344918951e-06,
|
| 13100 |
+
"loss": 1.0767,
|
| 13101 |
+
"step": 1868
|
| 13102 |
+
},
|
| 13103 |
+
{
|
| 13104 |
+
"epoch": 2.3695238095238094,
|
| 13105 |
+
"grad_norm": 0.9375,
|
| 13106 |
+
"learning_rate": 2.6255328196608177e-06,
|
| 13107 |
+
"loss": 1.0746,
|
| 13108 |
+
"step": 1869
|
| 13109 |
+
},
|
| 13110 |
+
{
|
| 13111 |
+
"epoch": 2.370793650793651,
|
| 13112 |
+
"grad_norm": 1.0078125,
|
| 13113 |
+
"learning_rate": 2.6220402233975134e-06,
|
| 13114 |
+
"loss": 1.0798,
|
| 13115 |
+
"step": 1870
|
| 13116 |
+
},
|
| 13117 |
+
{
|
| 13118 |
+
"epoch": 2.372063492063492,
|
| 13119 |
+
"grad_norm": 0.87109375,
|
| 13120 |
+
"learning_rate": 2.618548559839414e-06,
|
| 13121 |
+
"loss": 1.0402,
|
| 13122 |
+
"step": 1871
|
| 13123 |
+
},
|
| 13124 |
+
{
|
| 13125 |
+
"epoch": 2.3733333333333335,
|
| 13126 |
+
"grad_norm": 0.94921875,
|
| 13127 |
+
"learning_rate": 2.6150578326959075e-06,
|
| 13128 |
+
"loss": 1.1098,
|
| 13129 |
+
"step": 1872
|
| 13130 |
+
},
|
| 13131 |
+
{
|
| 13132 |
+
"epoch": 2.3746031746031746,
|
| 13133 |
+
"grad_norm": 0.95703125,
|
| 13134 |
+
"learning_rate": 2.611568045675388e-06,
|
| 13135 |
+
"loss": 1.1225,
|
| 13136 |
+
"step": 1873
|
| 13137 |
+
},
|
| 13138 |
+
{
|
| 13139 |
+
"epoch": 2.3758730158730157,
|
| 13140 |
+
"grad_norm": 1.09375,
|
| 13141 |
+
"learning_rate": 2.6080792024852476e-06,
|
| 13142 |
+
"loss": 1.1165,
|
| 13143 |
+
"step": 1874
|
| 13144 |
+
},
|
| 13145 |
+
{
|
| 13146 |
+
"epoch": 2.3771428571428572,
|
| 13147 |
+
"grad_norm": 0.98828125,
|
| 13148 |
+
"learning_rate": 2.604591306831877e-06,
|
| 13149 |
+
"loss": 1.1329,
|
| 13150 |
+
"step": 1875
|
| 13151 |
+
},
|
| 13152 |
+
{
|
| 13153 |
+
"epoch": 2.3784126984126983,
|
| 13154 |
+
"grad_norm": 0.95703125,
|
| 13155 |
+
"learning_rate": 2.6011043624206627e-06,
|
| 13156 |
+
"loss": 1.0614,
|
| 13157 |
+
"step": 1876
|
| 13158 |
+
},
|
| 13159 |
+
{
|
| 13160 |
+
"epoch": 2.37968253968254,
|
| 13161 |
+
"grad_norm": 0.921875,
|
| 13162 |
+
"learning_rate": 2.597618372955976e-06,
|
| 13163 |
+
"loss": 0.9886,
|
| 13164 |
+
"step": 1877
|
| 13165 |
+
},
|
| 13166 |
+
{
|
| 13167 |
+
"epoch": 2.380952380952381,
|
| 13168 |
+
"grad_norm": 0.95703125,
|
| 13169 |
+
"learning_rate": 2.594133342141177e-06,
|
| 13170 |
+
"loss": 1.0381,
|
| 13171 |
+
"step": 1878
|
| 13172 |
+
},
|
| 13173 |
+
{
|
| 13174 |
+
"epoch": 2.3822222222222225,
|
| 13175 |
+
"grad_norm": 1.0078125,
|
| 13176 |
+
"learning_rate": 2.5906492736786086e-06,
|
| 13177 |
+
"loss": 1.0849,
|
| 13178 |
+
"step": 1879
|
| 13179 |
+
},
|
| 13180 |
+
{
|
| 13181 |
+
"epoch": 2.3834920634920636,
|
| 13182 |
+
"grad_norm": 0.9375,
|
| 13183 |
+
"learning_rate": 2.5871661712695866e-06,
|
| 13184 |
+
"loss": 1.0896,
|
| 13185 |
+
"step": 1880
|
| 13186 |
+
},
|
| 13187 |
+
{
|
| 13188 |
+
"epoch": 2.3847619047619046,
|
| 13189 |
+
"grad_norm": 0.96875,
|
| 13190 |
+
"learning_rate": 2.5836840386144032e-06,
|
| 13191 |
+
"loss": 1.1288,
|
| 13192 |
+
"step": 1881
|
| 13193 |
+
},
|
| 13194 |
+
{
|
| 13195 |
+
"epoch": 2.386031746031746,
|
| 13196 |
+
"grad_norm": 1.0234375,
|
| 13197 |
+
"learning_rate": 2.5802028794123236e-06,
|
| 13198 |
+
"loss": 1.1489,
|
| 13199 |
+
"step": 1882
|
| 13200 |
+
},
|
| 13201 |
+
{
|
| 13202 |
+
"epoch": 2.3873015873015873,
|
| 13203 |
+
"grad_norm": 0.953125,
|
| 13204 |
+
"learning_rate": 2.5767226973615724e-06,
|
| 13205 |
+
"loss": 1.0566,
|
| 13206 |
+
"step": 1883
|
| 13207 |
+
},
|
| 13208 |
+
{
|
| 13209 |
+
"epoch": 2.388571428571429,
|
| 13210 |
+
"grad_norm": 1.0234375,
|
| 13211 |
+
"learning_rate": 2.5732434961593405e-06,
|
| 13212 |
+
"loss": 1.1293,
|
| 13213 |
+
"step": 1884
|
| 13214 |
+
},
|
| 13215 |
+
{
|
| 13216 |
+
"epoch": 2.38984126984127,
|
| 13217 |
+
"grad_norm": 1.0078125,
|
| 13218 |
+
"learning_rate": 2.5697652795017767e-06,
|
| 13219 |
+
"loss": 1.0864,
|
| 13220 |
+
"step": 1885
|
| 13221 |
+
},
|
| 13222 |
+
{
|
| 13223 |
+
"epoch": 2.391111111111111,
|
| 13224 |
+
"grad_norm": 0.92578125,
|
| 13225 |
+
"learning_rate": 2.5662880510839806e-06,
|
| 13226 |
+
"loss": 1.0624,
|
| 13227 |
+
"step": 1886
|
| 13228 |
+
},
|
| 13229 |
+
{
|
| 13230 |
+
"epoch": 2.3923809523809525,
|
| 13231 |
+
"grad_norm": 0.94140625,
|
| 13232 |
+
"learning_rate": 2.5628118146000057e-06,
|
| 13233 |
+
"loss": 1.1134,
|
| 13234 |
+
"step": 1887
|
| 13235 |
+
},
|
| 13236 |
+
{
|
| 13237 |
+
"epoch": 2.3936507936507936,
|
| 13238 |
+
"grad_norm": 0.9921875,
|
| 13239 |
+
"learning_rate": 2.5593365737428515e-06,
|
| 13240 |
+
"loss": 1.098,
|
| 13241 |
+
"step": 1888
|
| 13242 |
+
},
|
| 13243 |
+
{
|
| 13244 |
+
"epoch": 2.394920634920635,
|
| 13245 |
+
"grad_norm": 0.99609375,
|
| 13246 |
+
"learning_rate": 2.555862332204455e-06,
|
| 13247 |
+
"loss": 1.0941,
|
| 13248 |
+
"step": 1889
|
| 13249 |
+
},
|
| 13250 |
+
{
|
| 13251 |
+
"epoch": 2.396190476190476,
|
| 13252 |
+
"grad_norm": 0.9921875,
|
| 13253 |
+
"learning_rate": 2.5523890936756973e-06,
|
| 13254 |
+
"loss": 1.0794,
|
| 13255 |
+
"step": 1890
|
| 13256 |
+
},
|
| 13257 |
+
{
|
| 13258 |
+
"epoch": 2.3974603174603173,
|
| 13259 |
+
"grad_norm": 0.9140625,
|
| 13260 |
+
"learning_rate": 2.5489168618463925e-06,
|
| 13261 |
+
"loss": 1.0828,
|
| 13262 |
+
"step": 1891
|
| 13263 |
+
},
|
| 13264 |
+
{
|
| 13265 |
+
"epoch": 2.398730158730159,
|
| 13266 |
+
"grad_norm": 0.9921875,
|
| 13267 |
+
"learning_rate": 2.545445640405282e-06,
|
| 13268 |
+
"loss": 1.0805,
|
| 13269 |
+
"step": 1892
|
| 13270 |
+
},
|
| 13271 |
+
{
|
| 13272 |
+
"epoch": 2.4,
|
| 13273 |
+
"grad_norm": 1.03125,
|
| 13274 |
+
"learning_rate": 2.541975433040038e-06,
|
| 13275 |
+
"loss": 1.0684,
|
| 13276 |
+
"step": 1893
|
| 13277 |
+
},
|
| 13278 |
+
{
|
| 13279 |
+
"epoch": 2.4012698412698414,
|
| 13280 |
+
"grad_norm": 0.95703125,
|
| 13281 |
+
"learning_rate": 2.538506243437254e-06,
|
| 13282 |
+
"loss": 1.0645,
|
| 13283 |
+
"step": 1894
|
| 13284 |
+
},
|
| 13285 |
+
{
|
| 13286 |
+
"epoch": 2.4025396825396825,
|
| 13287 |
+
"grad_norm": 1.0078125,
|
| 13288 |
+
"learning_rate": 2.5350380752824398e-06,
|
| 13289 |
+
"loss": 1.1507,
|
| 13290 |
+
"step": 1895
|
| 13291 |
+
},
|
| 13292 |
+
{
|
| 13293 |
+
"epoch": 2.4038095238095236,
|
| 13294 |
+
"grad_norm": 0.9921875,
|
| 13295 |
+
"learning_rate": 2.5315709322600235e-06,
|
| 13296 |
+
"loss": 1.1797,
|
| 13297 |
+
"step": 1896
|
| 13298 |
+
},
|
| 13299 |
+
{
|
| 13300 |
+
"epoch": 2.405079365079365,
|
| 13301 |
+
"grad_norm": 1.03125,
|
| 13302 |
+
"learning_rate": 2.5281048180533435e-06,
|
| 13303 |
+
"loss": 1.0862,
|
| 13304 |
+
"step": 1897
|
| 13305 |
+
},
|
| 13306 |
+
{
|
| 13307 |
+
"epoch": 2.4063492063492062,
|
| 13308 |
+
"grad_norm": 0.90234375,
|
| 13309 |
+
"learning_rate": 2.5246397363446434e-06,
|
| 13310 |
+
"loss": 1.0854,
|
| 13311 |
+
"step": 1898
|
| 13312 |
+
},
|
| 13313 |
+
{
|
| 13314 |
+
"epoch": 2.4076190476190478,
|
| 13315 |
+
"grad_norm": 0.9296875,
|
| 13316 |
+
"learning_rate": 2.521175690815071e-06,
|
| 13317 |
+
"loss": 1.1069,
|
| 13318 |
+
"step": 1899
|
| 13319 |
+
},
|
| 13320 |
+
{
|
| 13321 |
+
"epoch": 2.408888888888889,
|
| 13322 |
+
"grad_norm": 0.9140625,
|
| 13323 |
+
"learning_rate": 2.517712685144675e-06,
|
| 13324 |
+
"loss": 1.1071,
|
| 13325 |
+
"step": 1900
|
| 13326 |
+
},
|
| 13327 |
+
{
|
| 13328 |
+
"epoch": 2.41015873015873,
|
| 13329 |
+
"grad_norm": 0.9765625,
|
| 13330 |
+
"learning_rate": 2.5142507230123958e-06,
|
| 13331 |
+
"loss": 1.1429,
|
| 13332 |
+
"step": 1901
|
| 13333 |
+
},
|
| 13334 |
+
{
|
| 13335 |
+
"epoch": 2.4114285714285715,
|
| 13336 |
+
"grad_norm": 1.0078125,
|
| 13337 |
+
"learning_rate": 2.510789808096067e-06,
|
| 13338 |
+
"loss": 1.1749,
|
| 13339 |
+
"step": 1902
|
| 13340 |
+
},
|
| 13341 |
+
{
|
| 13342 |
+
"epoch": 2.4126984126984126,
|
| 13343 |
+
"grad_norm": 0.95703125,
|
| 13344 |
+
"learning_rate": 2.5073299440724123e-06,
|
| 13345 |
+
"loss": 1.1322,
|
| 13346 |
+
"step": 1903
|
| 13347 |
+
},
|
| 13348 |
+
{
|
| 13349 |
+
"epoch": 2.413968253968254,
|
| 13350 |
+
"grad_norm": 0.9921875,
|
| 13351 |
+
"learning_rate": 2.5038711346170343e-06,
|
| 13352 |
+
"loss": 1.1336,
|
| 13353 |
+
"step": 1904
|
| 13354 |
+
},
|
| 13355 |
+
{
|
| 13356 |
+
"epoch": 2.415238095238095,
|
| 13357 |
+
"grad_norm": 0.9609375,
|
| 13358 |
+
"learning_rate": 2.5004133834044173e-06,
|
| 13359 |
+
"loss": 1.0746,
|
| 13360 |
+
"step": 1905
|
| 13361 |
+
},
|
| 13362 |
+
{
|
| 13363 |
+
"epoch": 2.4165079365079363,
|
| 13364 |
+
"grad_norm": 0.921875,
|
| 13365 |
+
"learning_rate": 2.4969566941079237e-06,
|
| 13366 |
+
"loss": 1.0861,
|
| 13367 |
+
"step": 1906
|
| 13368 |
+
},
|
| 13369 |
+
{
|
| 13370 |
+
"epoch": 2.417777777777778,
|
| 13371 |
+
"grad_norm": 0.94921875,
|
| 13372 |
+
"learning_rate": 2.4935010703997837e-06,
|
| 13373 |
+
"loss": 1.0976,
|
| 13374 |
+
"step": 1907
|
| 13375 |
+
},
|
| 13376 |
+
{
|
| 13377 |
+
"epoch": 2.419047619047619,
|
| 13378 |
+
"grad_norm": 0.96484375,
|
| 13379 |
+
"learning_rate": 2.490046515951098e-06,
|
| 13380 |
+
"loss": 1.1116,
|
| 13381 |
+
"step": 1908
|
| 13382 |
+
},
|
| 13383 |
+
{
|
| 13384 |
+
"epoch": 2.4203174603174604,
|
| 13385 |
+
"grad_norm": 0.9609375,
|
| 13386 |
+
"learning_rate": 2.4865930344318318e-06,
|
| 13387 |
+
"loss": 1.1143,
|
| 13388 |
+
"step": 1909
|
| 13389 |
+
},
|
| 13390 |
+
{
|
| 13391 |
+
"epoch": 2.4215873015873015,
|
| 13392 |
+
"grad_norm": 1.0078125,
|
| 13393 |
+
"learning_rate": 2.4831406295108073e-06,
|
| 13394 |
+
"loss": 1.1464,
|
| 13395 |
+
"step": 1910
|
| 13396 |
+
},
|
| 13397 |
+
{
|
| 13398 |
+
"epoch": 2.422857142857143,
|
| 13399 |
+
"grad_norm": 1.0078125,
|
| 13400 |
+
"learning_rate": 2.4796893048557065e-06,
|
| 13401 |
+
"loss": 1.0728,
|
| 13402 |
+
"step": 1911
|
| 13403 |
+
},
|
| 13404 |
+
{
|
| 13405 |
+
"epoch": 2.424126984126984,
|
| 13406 |
+
"grad_norm": 0.98828125,
|
| 13407 |
+
"learning_rate": 2.476239064133061e-06,
|
| 13408 |
+
"loss": 1.056,
|
| 13409 |
+
"step": 1912
|
| 13410 |
+
},
|
| 13411 |
+
{
|
| 13412 |
+
"epoch": 2.425396825396825,
|
| 13413 |
+
"grad_norm": 0.90625,
|
| 13414 |
+
"learning_rate": 2.472789911008256e-06,
|
| 13415 |
+
"loss": 1.0837,
|
| 13416 |
+
"step": 1913
|
| 13417 |
+
},
|
| 13418 |
+
{
|
| 13419 |
+
"epoch": 2.4266666666666667,
|
| 13420 |
+
"grad_norm": 0.96484375,
|
| 13421 |
+
"learning_rate": 2.4693418491455134e-06,
|
| 13422 |
+
"loss": 1.1147,
|
| 13423 |
+
"step": 1914
|
| 13424 |
+
},
|
| 13425 |
+
{
|
| 13426 |
+
"epoch": 2.427936507936508,
|
| 13427 |
+
"grad_norm": 0.97265625,
|
| 13428 |
+
"learning_rate": 2.4658948822079015e-06,
|
| 13429 |
+
"loss": 1.1255,
|
| 13430 |
+
"step": 1915
|
| 13431 |
+
},
|
| 13432 |
+
{
|
| 13433 |
+
"epoch": 2.4292063492063494,
|
| 13434 |
+
"grad_norm": 0.99609375,
|
| 13435 |
+
"learning_rate": 2.462449013857324e-06,
|
| 13436 |
+
"loss": 1.061,
|
| 13437 |
+
"step": 1916
|
| 13438 |
+
},
|
| 13439 |
+
{
|
| 13440 |
+
"epoch": 2.4304761904761905,
|
| 13441 |
+
"grad_norm": 0.99609375,
|
| 13442 |
+
"learning_rate": 2.4590042477545174e-06,
|
| 13443 |
+
"loss": 1.0575,
|
| 13444 |
+
"step": 1917
|
| 13445 |
+
},
|
| 13446 |
+
{
|
| 13447 |
+
"epoch": 2.431746031746032,
|
| 13448 |
+
"grad_norm": 0.93359375,
|
| 13449 |
+
"learning_rate": 2.455560587559046e-06,
|
| 13450 |
+
"loss": 1.0712,
|
| 13451 |
+
"step": 1918
|
| 13452 |
+
},
|
| 13453 |
+
{
|
| 13454 |
+
"epoch": 2.433015873015873,
|
| 13455 |
+
"grad_norm": 0.94921875,
|
| 13456 |
+
"learning_rate": 2.452118036929303e-06,
|
| 13457 |
+
"loss": 1.1059,
|
| 13458 |
+
"step": 1919
|
| 13459 |
+
},
|
| 13460 |
+
{
|
| 13461 |
+
"epoch": 2.434285714285714,
|
| 13462 |
+
"grad_norm": 0.953125,
|
| 13463 |
+
"learning_rate": 2.448676599522497e-06,
|
| 13464 |
+
"loss": 1.0617,
|
| 13465 |
+
"step": 1920
|
| 13466 |
+
},
|
| 13467 |
+
{
|
| 13468 |
+
"epoch": 2.4355555555555557,
|
| 13469 |
+
"grad_norm": 0.99609375,
|
| 13470 |
+
"learning_rate": 2.4452362789946585e-06,
|
| 13471 |
+
"loss": 1.1436,
|
| 13472 |
+
"step": 1921
|
| 13473 |
+
},
|
| 13474 |
+
{
|
| 13475 |
+
"epoch": 2.436825396825397,
|
| 13476 |
+
"grad_norm": 0.8984375,
|
| 13477 |
+
"learning_rate": 2.441797079000633e-06,
|
| 13478 |
+
"loss": 1.1023,
|
| 13479 |
+
"step": 1922
|
| 13480 |
+
},
|
| 13481 |
+
{
|
| 13482 |
+
"epoch": 2.4380952380952383,
|
| 13483 |
+
"grad_norm": 1.0078125,
|
| 13484 |
+
"learning_rate": 2.4383590031940674e-06,
|
| 13485 |
+
"loss": 1.1191,
|
| 13486 |
+
"step": 1923
|
| 13487 |
+
},
|
| 13488 |
+
{
|
| 13489 |
+
"epoch": 2.4393650793650794,
|
| 13490 |
+
"grad_norm": 0.96875,
|
| 13491 |
+
"learning_rate": 2.4349220552274235e-06,
|
| 13492 |
+
"loss": 1.1114,
|
| 13493 |
+
"step": 1924
|
| 13494 |
+
},
|
| 13495 |
+
{
|
| 13496 |
+
"epoch": 2.4406349206349205,
|
| 13497 |
+
"grad_norm": 0.93359375,
|
| 13498 |
+
"learning_rate": 2.4314862387519605e-06,
|
| 13499 |
+
"loss": 1.1117,
|
| 13500 |
+
"step": 1925
|
| 13501 |
+
},
|
| 13502 |
+
{
|
| 13503 |
+
"epoch": 2.441904761904762,
|
| 13504 |
+
"grad_norm": 0.92578125,
|
| 13505 |
+
"learning_rate": 2.4280515574177355e-06,
|
| 13506 |
+
"loss": 1.1458,
|
| 13507 |
+
"step": 1926
|
| 13508 |
+
},
|
| 13509 |
+
{
|
| 13510 |
+
"epoch": 2.443174603174603,
|
| 13511 |
+
"grad_norm": 0.94140625,
|
| 13512 |
+
"learning_rate": 2.4246180148736005e-06,
|
| 13513 |
+
"loss": 1.066,
|
| 13514 |
+
"step": 1927
|
| 13515 |
+
},
|
| 13516 |
+
{
|
| 13517 |
+
"epoch": 2.4444444444444446,
|
| 13518 |
+
"grad_norm": 0.96875,
|
| 13519 |
+
"learning_rate": 2.4211856147671983e-06,
|
| 13520 |
+
"loss": 1.0545,
|
| 13521 |
+
"step": 1928
|
| 13522 |
+
},
|
| 13523 |
+
{
|
| 13524 |
+
"epoch": 2.4457142857142857,
|
| 13525 |
+
"grad_norm": 1.046875,
|
| 13526 |
+
"learning_rate": 2.417754360744956e-06,
|
| 13527 |
+
"loss": 1.1227,
|
| 13528 |
+
"step": 1929
|
| 13529 |
+
},
|
| 13530 |
+
{
|
| 13531 |
+
"epoch": 2.446984126984127,
|
| 13532 |
+
"grad_norm": 0.93359375,
|
| 13533 |
+
"learning_rate": 2.4143242564520854e-06,
|
| 13534 |
+
"loss": 1.0712,
|
| 13535 |
+
"step": 1930
|
| 13536 |
+
},
|
| 13537 |
+
{
|
| 13538 |
+
"epoch": 2.4482539682539683,
|
| 13539 |
+
"grad_norm": 0.9765625,
|
| 13540 |
+
"learning_rate": 2.410895305532577e-06,
|
| 13541 |
+
"loss": 1.1392,
|
| 13542 |
+
"step": 1931
|
| 13543 |
+
},
|
| 13544 |
+
{
|
| 13545 |
+
"epoch": 2.4495238095238094,
|
| 13546 |
+
"grad_norm": 0.9765625,
|
| 13547 |
+
"learning_rate": 2.407467511629192e-06,
|
| 13548 |
+
"loss": 1.1064,
|
| 13549 |
+
"step": 1932
|
| 13550 |
+
},
|
| 13551 |
+
{
|
| 13552 |
+
"epoch": 2.450793650793651,
|
| 13553 |
+
"grad_norm": 0.93359375,
|
| 13554 |
+
"learning_rate": 2.404040878383467e-06,
|
| 13555 |
+
"loss": 1.0607,
|
| 13556 |
+
"step": 1933
|
| 13557 |
+
},
|
| 13558 |
+
{
|
| 13559 |
+
"epoch": 2.452063492063492,
|
| 13560 |
+
"grad_norm": 0.9453125,
|
| 13561 |
+
"learning_rate": 2.400615409435705e-06,
|
| 13562 |
+
"loss": 1.0725,
|
| 13563 |
+
"step": 1934
|
| 13564 |
+
},
|
| 13565 |
+
{
|
| 13566 |
+
"epoch": 2.453333333333333,
|
| 13567 |
+
"grad_norm": 0.94140625,
|
| 13568 |
+
"learning_rate": 2.3971911084249687e-06,
|
| 13569 |
+
"loss": 1.0903,
|
| 13570 |
+
"step": 1935
|
| 13571 |
+
},
|
| 13572 |
+
{
|
| 13573 |
+
"epoch": 2.4546031746031747,
|
| 13574 |
+
"grad_norm": 1.03125,
|
| 13575 |
+
"learning_rate": 2.3937679789890837e-06,
|
| 13576 |
+
"loss": 1.0725,
|
| 13577 |
+
"step": 1936
|
| 13578 |
+
},
|
| 13579 |
+
{
|
| 13580 |
+
"epoch": 2.4558730158730158,
|
| 13581 |
+
"grad_norm": 0.9765625,
|
| 13582 |
+
"learning_rate": 2.3903460247646305e-06,
|
| 13583 |
+
"loss": 1.0601,
|
| 13584 |
+
"step": 1937
|
| 13585 |
+
},
|
| 13586 |
+
{
|
| 13587 |
+
"epoch": 2.4571428571428573,
|
| 13588 |
+
"grad_norm": 0.95703125,
|
| 13589 |
+
"learning_rate": 2.3869252493869386e-06,
|
| 13590 |
+
"loss": 1.1456,
|
| 13591 |
+
"step": 1938
|
| 13592 |
+
},
|
| 13593 |
+
{
|
| 13594 |
+
"epoch": 2.4584126984126984,
|
| 13595 |
+
"grad_norm": 0.92578125,
|
| 13596 |
+
"learning_rate": 2.3835056564900872e-06,
|
| 13597 |
+
"loss": 1.1521,
|
| 13598 |
+
"step": 1939
|
| 13599 |
+
},
|
| 13600 |
+
{
|
| 13601 |
+
"epoch": 2.4596825396825395,
|
| 13602 |
+
"grad_norm": 0.91015625,
|
| 13603 |
+
"learning_rate": 2.3800872497069003e-06,
|
| 13604 |
+
"loss": 1.0804,
|
| 13605 |
+
"step": 1940
|
| 13606 |
+
},
|
| 13607 |
+
{
|
| 13608 |
+
"epoch": 2.460952380952381,
|
| 13609 |
+
"grad_norm": 0.91015625,
|
| 13610 |
+
"learning_rate": 2.3766700326689385e-06,
|
| 13611 |
+
"loss": 1.0186,
|
| 13612 |
+
"step": 1941
|
| 13613 |
+
},
|
| 13614 |
+
{
|
| 13615 |
+
"epoch": 2.462222222222222,
|
| 13616 |
+
"grad_norm": 0.96875,
|
| 13617 |
+
"learning_rate": 2.3732540090065007e-06,
|
| 13618 |
+
"loss": 1.0648,
|
| 13619 |
+
"step": 1942
|
| 13620 |
+
},
|
| 13621 |
+
{
|
| 13622 |
+
"epoch": 2.4634920634920636,
|
| 13623 |
+
"grad_norm": 0.96875,
|
| 13624 |
+
"learning_rate": 2.369839182348619e-06,
|
| 13625 |
+
"loss": 1.0986,
|
| 13626 |
+
"step": 1943
|
| 13627 |
+
},
|
| 13628 |
+
{
|
| 13629 |
+
"epoch": 2.4647619047619047,
|
| 13630 |
+
"grad_norm": 0.95703125,
|
| 13631 |
+
"learning_rate": 2.3664255563230506e-06,
|
| 13632 |
+
"loss": 1.051,
|
| 13633 |
+
"step": 1944
|
| 13634 |
+
},
|
| 13635 |
+
{
|
| 13636 |
+
"epoch": 2.466031746031746,
|
| 13637 |
+
"grad_norm": 0.9921875,
|
| 13638 |
+
"learning_rate": 2.36301313455628e-06,
|
| 13639 |
+
"loss": 1.1296,
|
| 13640 |
+
"step": 1945
|
| 13641 |
+
},
|
| 13642 |
+
{
|
| 13643 |
+
"epoch": 2.4673015873015873,
|
| 13644 |
+
"grad_norm": 0.8984375,
|
| 13645 |
+
"learning_rate": 2.359601920673513e-06,
|
| 13646 |
+
"loss": 1.0191,
|
| 13647 |
+
"step": 1946
|
| 13648 |
+
},
|
| 13649 |
+
{
|
| 13650 |
+
"epoch": 2.4685714285714284,
|
| 13651 |
+
"grad_norm": 1.0546875,
|
| 13652 |
+
"learning_rate": 2.356191918298668e-06,
|
| 13653 |
+
"loss": 1.0894,
|
| 13654 |
+
"step": 1947
|
| 13655 |
+
},
|
| 13656 |
+
{
|
| 13657 |
+
"epoch": 2.46984126984127,
|
| 13658 |
+
"grad_norm": 0.9453125,
|
| 13659 |
+
"learning_rate": 2.3527831310543813e-06,
|
| 13660 |
+
"loss": 1.0448,
|
| 13661 |
+
"step": 1948
|
| 13662 |
+
},
|
| 13663 |
+
{
|
| 13664 |
+
"epoch": 2.471111111111111,
|
| 13665 |
+
"grad_norm": 0.9921875,
|
| 13666 |
+
"learning_rate": 2.3493755625619958e-06,
|
| 13667 |
+
"loss": 1.131,
|
| 13668 |
+
"step": 1949
|
| 13669 |
+
},
|
| 13670 |
+
{
|
| 13671 |
+
"epoch": 2.4723809523809526,
|
| 13672 |
+
"grad_norm": 0.953125,
|
| 13673 |
+
"learning_rate": 2.3459692164415576e-06,
|
| 13674 |
+
"loss": 1.0821,
|
| 13675 |
+
"step": 1950
|
| 13676 |
+
},
|
| 13677 |
+
{
|
| 13678 |
+
"epoch": 2.4736507936507937,
|
| 13679 |
+
"grad_norm": 1.0,
|
| 13680 |
+
"learning_rate": 2.342564096311818e-06,
|
| 13681 |
+
"loss": 1.08,
|
| 13682 |
+
"step": 1951
|
| 13683 |
+
},
|
| 13684 |
+
{
|
| 13685 |
+
"epoch": 2.4749206349206347,
|
| 13686 |
+
"grad_norm": 0.953125,
|
| 13687 |
+
"learning_rate": 2.339160205790226e-06,
|
| 13688 |
+
"loss": 1.1783,
|
| 13689 |
+
"step": 1952
|
| 13690 |
+
},
|
| 13691 |
+
{
|
| 13692 |
+
"epoch": 2.4761904761904763,
|
| 13693 |
+
"grad_norm": 1.1015625,
|
| 13694 |
+
"learning_rate": 2.33575754849292e-06,
|
| 13695 |
+
"loss": 1.2117,
|
| 13696 |
+
"step": 1953
|
| 13697 |
+
},
|
| 13698 |
+
{
|
| 13699 |
+
"epoch": 2.4774603174603174,
|
| 13700 |
+
"grad_norm": 0.9296875,
|
| 13701 |
+
"learning_rate": 2.3323561280347326e-06,
|
| 13702 |
+
"loss": 1.093,
|
| 13703 |
+
"step": 1954
|
| 13704 |
+
},
|
| 13705 |
+
{
|
| 13706 |
+
"epoch": 2.478730158730159,
|
| 13707 |
+
"grad_norm": 0.89453125,
|
| 13708 |
+
"learning_rate": 2.3289559480291806e-06,
|
| 13709 |
+
"loss": 1.11,
|
| 13710 |
+
"step": 1955
|
| 13711 |
+
},
|
| 13712 |
+
{
|
| 13713 |
+
"epoch": 2.48,
|
| 13714 |
+
"grad_norm": 1.0,
|
| 13715 |
+
"learning_rate": 2.3255570120884617e-06,
|
| 13716 |
+
"loss": 1.0685,
|
| 13717 |
+
"step": 1956
|
| 13718 |
+
},
|
| 13719 |
+
{
|
| 13720 |
+
"epoch": 2.4812698412698415,
|
| 13721 |
+
"grad_norm": 0.9375,
|
| 13722 |
+
"learning_rate": 2.3221593238234553e-06,
|
| 13723 |
+
"loss": 1.0753,
|
| 13724 |
+
"step": 1957
|
| 13725 |
+
},
|
| 13726 |
+
{
|
| 13727 |
+
"epoch": 2.4825396825396826,
|
| 13728 |
+
"grad_norm": 1.0078125,
|
| 13729 |
+
"learning_rate": 2.3187628868437134e-06,
|
| 13730 |
+
"loss": 1.2114,
|
| 13731 |
+
"step": 1958
|
| 13732 |
+
},
|
| 13733 |
+
{
|
| 13734 |
+
"epoch": 2.4838095238095237,
|
| 13735 |
+
"grad_norm": 0.984375,
|
| 13736 |
+
"learning_rate": 2.3153677047574567e-06,
|
| 13737 |
+
"loss": 1.1823,
|
| 13738 |
+
"step": 1959
|
| 13739 |
+
},
|
| 13740 |
+
{
|
| 13741 |
+
"epoch": 2.485079365079365,
|
| 13742 |
+
"grad_norm": 0.99609375,
|
| 13743 |
+
"learning_rate": 2.311973781171577e-06,
|
| 13744 |
+
"loss": 1.0855,
|
| 13745 |
+
"step": 1960
|
| 13746 |
+
},
|
| 13747 |
+
{
|
| 13748 |
+
"epoch": 2.4863492063492063,
|
| 13749 |
+
"grad_norm": 1.0,
|
| 13750 |
+
"learning_rate": 2.3085811196916274e-06,
|
| 13751 |
+
"loss": 1.1072,
|
| 13752 |
+
"step": 1961
|
| 13753 |
+
},
|
| 13754 |
+
{
|
| 13755 |
+
"epoch": 2.487619047619048,
|
| 13756 |
+
"grad_norm": 0.98046875,
|
| 13757 |
+
"learning_rate": 2.305189723921817e-06,
|
| 13758 |
+
"loss": 1.1988,
|
| 13759 |
+
"step": 1962
|
| 13760 |
+
},
|
| 13761 |
+
{
|
| 13762 |
+
"epoch": 2.488888888888889,
|
| 13763 |
+
"grad_norm": 1.015625,
|
| 13764 |
+
"learning_rate": 2.3017995974650146e-06,
|
| 13765 |
+
"loss": 1.1193,
|
| 13766 |
+
"step": 1963
|
| 13767 |
+
},
|
| 13768 |
+
{
|
| 13769 |
+
"epoch": 2.49015873015873,
|
| 13770 |
+
"grad_norm": 1.0234375,
|
| 13771 |
+
"learning_rate": 2.2984107439227395e-06,
|
| 13772 |
+
"loss": 1.0405,
|
| 13773 |
+
"step": 1964
|
| 13774 |
+
},
|
| 13775 |
+
{
|
| 13776 |
+
"epoch": 2.4914285714285715,
|
| 13777 |
+
"grad_norm": 0.98046875,
|
| 13778 |
+
"learning_rate": 2.2950231668951576e-06,
|
| 13779 |
+
"loss": 1.1319,
|
| 13780 |
+
"step": 1965
|
| 13781 |
+
},
|
| 13782 |
+
{
|
| 13783 |
+
"epoch": 2.4926984126984126,
|
| 13784 |
+
"grad_norm": 0.94140625,
|
| 13785 |
+
"learning_rate": 2.291636869981079e-06,
|
| 13786 |
+
"loss": 1.0125,
|
| 13787 |
+
"step": 1966
|
| 13788 |
+
},
|
| 13789 |
+
{
|
| 13790 |
+
"epoch": 2.493968253968254,
|
| 13791 |
+
"grad_norm": 0.9921875,
|
| 13792 |
+
"learning_rate": 2.2882518567779545e-06,
|
| 13793 |
+
"loss": 1.1485,
|
| 13794 |
+
"step": 1967
|
| 13795 |
+
},
|
| 13796 |
+
{
|
| 13797 |
+
"epoch": 2.4952380952380953,
|
| 13798 |
+
"grad_norm": 0.93359375,
|
| 13799 |
+
"learning_rate": 2.2848681308818707e-06,
|
| 13800 |
+
"loss": 1.0248,
|
| 13801 |
+
"step": 1968
|
| 13802 |
+
},
|
| 13803 |
+
{
|
| 13804 |
+
"epoch": 2.4965079365079363,
|
| 13805 |
+
"grad_norm": 0.98828125,
|
| 13806 |
+
"learning_rate": 2.2814856958875457e-06,
|
| 13807 |
+
"loss": 1.1444,
|
| 13808 |
+
"step": 1969
|
| 13809 |
+
},
|
| 13810 |
+
{
|
| 13811 |
+
"epoch": 2.497777777777778,
|
| 13812 |
+
"grad_norm": 1.0390625,
|
| 13813 |
+
"learning_rate": 2.27810455538833e-06,
|
| 13814 |
+
"loss": 1.0855,
|
| 13815 |
+
"step": 1970
|
| 13816 |
}
|
| 13817 |
],
|
| 13818 |
"logging_steps": 1,
|
|
|
|
| 13832 |
"attributes": {}
|
| 13833 |
}
|
| 13834 |
},
|
| 13835 |
+
"total_flos": 4.362570197203157e+18,
|
| 13836 |
"train_batch_size": 2,
|
| 13837 |
"trial_name": null,
|
| 13838 |
"trial_params": null
|