Training in progress, step 40000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3511 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 306619286
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecd57c7eace6adeec4710fec99cac2074403be145022a172c36c987b89390434
|
| 3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919972410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51104d0f8c0c31f4a4bccea6f88abc44b2a966c89edf808f6831a68d92b864e7
|
| 3 |
size 919972410
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e735ed11597ed40a2b6854e0229902e1a21fedc0a0dbc608ca905fae57d5b06b
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ba3815fc0953b1b7f08cea092dfc0a62c4bbc2a2c68780d3f4dd0b5e22582a7
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:647ac15563fcad903adbb616e9b2c36b237a3ed5939d088620212da969930f6c
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93e3733c5b180986b7efbec17b663bf5231343d187374d184768fcd913797167
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9820ea4fec1b01f3da091290c3e8b5ddb86a3a3fa17285c248b64910c2d0b4f0
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7413035def085e41776a629afc94fc24fe5a955f1ad83b32f9b370ab60f9a18d
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91e3953bcbf4089415abffbd914fbbe4580121f6c843eabbf70624c5ed144814
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:369fde7bff4dfc0d6b9cf773cf9b0352696083f84763999e05a631ee6d52c5e3
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5802fc71a5ce36cca3a7f664b0fbad4d08efb9895a1c5eaa5692a421831e9c11
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 5000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -24563,6 +24563,3514 @@
|
|
| 24563 |
"eval_samples_per_second": 3187.082,
|
| 24564 |
"eval_steps_per_second": 49.8,
|
| 24565 |
"step": 35000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24566 |
}
|
| 24567 |
],
|
| 24568 |
"logging_steps": 10,
|
|
@@ -24582,7 +28090,7 @@
|
|
| 24582 |
"attributes": {}
|
| 24583 |
}
|
| 24584 |
},
|
| 24585 |
-
"total_flos": 6.
|
| 24586 |
"train_batch_size": 8,
|
| 24587 |
"trial_name": null,
|
| 24588 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.7422923379193082,
|
| 5 |
"eval_steps": 5000,
|
| 6 |
+
"global_step": 40000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 24563 |
"eval_samples_per_second": 3187.082,
|
| 24564 |
"eval_steps_per_second": 49.8,
|
| 24565 |
"step": 35000
|
| 24566 |
+
},
|
| 24567 |
+
{
|
| 24568 |
+
"epoch": 0.6496913687638745,
|
| 24569 |
+
"grad_norm": 35.21875,
|
| 24570 |
+
"learning_rate": 9.898485753065508e-06,
|
| 24571 |
+
"loss": 19.7997,
|
| 24572 |
+
"step": 35010
|
| 24573 |
+
},
|
| 24574 |
+
{
|
| 24575 |
+
"epoch": 0.6498769418483543,
|
| 24576 |
+
"grad_norm": 37.65625,
|
| 24577 |
+
"learning_rate": 9.898456757279466e-06,
|
| 24578 |
+
"loss": 20.4342,
|
| 24579 |
+
"step": 35020
|
| 24580 |
+
},
|
| 24581 |
+
{
|
| 24582 |
+
"epoch": 0.6500625149328342,
|
| 24583 |
+
"grad_norm": 34.59375,
|
| 24584 |
+
"learning_rate": 9.898427761493423e-06,
|
| 24585 |
+
"loss": 19.5847,
|
| 24586 |
+
"step": 35030
|
| 24587 |
+
},
|
| 24588 |
+
{
|
| 24589 |
+
"epoch": 0.6502480880173139,
|
| 24590 |
+
"grad_norm": 35.5,
|
| 24591 |
+
"learning_rate": 9.89839876570738e-06,
|
| 24592 |
+
"loss": 19.8744,
|
| 24593 |
+
"step": 35040
|
| 24594 |
+
},
|
| 24595 |
+
{
|
| 24596 |
+
"epoch": 0.6504336611017938,
|
| 24597 |
+
"grad_norm": 36.15625,
|
| 24598 |
+
"learning_rate": 9.898369769921338e-06,
|
| 24599 |
+
"loss": 19.74,
|
| 24600 |
+
"step": 35050
|
| 24601 |
+
},
|
| 24602 |
+
{
|
| 24603 |
+
"epoch": 0.6506192341862737,
|
| 24604 |
+
"grad_norm": 36.96875,
|
| 24605 |
+
"learning_rate": 9.898340774135295e-06,
|
| 24606 |
+
"loss": 19.1062,
|
| 24607 |
+
"step": 35060
|
| 24608 |
+
},
|
| 24609 |
+
{
|
| 24610 |
+
"epoch": 0.6508048072707534,
|
| 24611 |
+
"grad_norm": 36.21875,
|
| 24612 |
+
"learning_rate": 9.898311778349253e-06,
|
| 24613 |
+
"loss": 19.9495,
|
| 24614 |
+
"step": 35070
|
| 24615 |
+
},
|
| 24616 |
+
{
|
| 24617 |
+
"epoch": 0.6509903803552333,
|
| 24618 |
+
"grad_norm": 33.96875,
|
| 24619 |
+
"learning_rate": 9.898282782563212e-06,
|
| 24620 |
+
"loss": 19.6127,
|
| 24621 |
+
"step": 35080
|
| 24622 |
+
},
|
| 24623 |
+
{
|
| 24624 |
+
"epoch": 0.6511759534397131,
|
| 24625 |
+
"grad_norm": 34.03125,
|
| 24626 |
+
"learning_rate": 9.89825378677717e-06,
|
| 24627 |
+
"loss": 19.839,
|
| 24628 |
+
"step": 35090
|
| 24629 |
+
},
|
| 24630 |
+
{
|
| 24631 |
+
"epoch": 0.6513615265241929,
|
| 24632 |
+
"grad_norm": 35.8125,
|
| 24633 |
+
"learning_rate": 9.898224790991125e-06,
|
| 24634 |
+
"loss": 19.3891,
|
| 24635 |
+
"step": 35100
|
| 24636 |
+
},
|
| 24637 |
+
{
|
| 24638 |
+
"epoch": 0.6515470996086727,
|
| 24639 |
+
"grad_norm": 37.0,
|
| 24640 |
+
"learning_rate": 9.898195795205084e-06,
|
| 24641 |
+
"loss": 19.8909,
|
| 24642 |
+
"step": 35110
|
| 24643 |
+
},
|
| 24644 |
+
{
|
| 24645 |
+
"epoch": 0.6517326726931526,
|
| 24646 |
+
"grad_norm": 33.90625,
|
| 24647 |
+
"learning_rate": 9.898166799419041e-06,
|
| 24648 |
+
"loss": 19.5183,
|
| 24649 |
+
"step": 35120
|
| 24650 |
+
},
|
| 24651 |
+
{
|
| 24652 |
+
"epoch": 0.6519182457776325,
|
| 24653 |
+
"grad_norm": 36.625,
|
| 24654 |
+
"learning_rate": 9.898137803632999e-06,
|
| 24655 |
+
"loss": 20.0471,
|
| 24656 |
+
"step": 35130
|
| 24657 |
+
},
|
| 24658 |
+
{
|
| 24659 |
+
"epoch": 0.6521038188621122,
|
| 24660 |
+
"grad_norm": 35.0,
|
| 24661 |
+
"learning_rate": 9.898108807846956e-06,
|
| 24662 |
+
"loss": 19.6311,
|
| 24663 |
+
"step": 35140
|
| 24664 |
+
},
|
| 24665 |
+
{
|
| 24666 |
+
"epoch": 0.6522893919465921,
|
| 24667 |
+
"grad_norm": 35.90625,
|
| 24668 |
+
"learning_rate": 9.898079812060914e-06,
|
| 24669 |
+
"loss": 19.8913,
|
| 24670 |
+
"step": 35150
|
| 24671 |
+
},
|
| 24672 |
+
{
|
| 24673 |
+
"epoch": 0.6524749650310719,
|
| 24674 |
+
"grad_norm": 35.5,
|
| 24675 |
+
"learning_rate": 9.898050816274871e-06,
|
| 24676 |
+
"loss": 19.7133,
|
| 24677 |
+
"step": 35160
|
| 24678 |
+
},
|
| 24679 |
+
{
|
| 24680 |
+
"epoch": 0.6526605381155517,
|
| 24681 |
+
"grad_norm": 36.65625,
|
| 24682 |
+
"learning_rate": 9.898021820488829e-06,
|
| 24683 |
+
"loss": 19.5409,
|
| 24684 |
+
"step": 35170
|
| 24685 |
+
},
|
| 24686 |
+
{
|
| 24687 |
+
"epoch": 0.6528461112000316,
|
| 24688 |
+
"grad_norm": 36.78125,
|
| 24689 |
+
"learning_rate": 9.897992824702786e-06,
|
| 24690 |
+
"loss": 19.4254,
|
| 24691 |
+
"step": 35180
|
| 24692 |
+
},
|
| 24693 |
+
{
|
| 24694 |
+
"epoch": 0.6530316842845114,
|
| 24695 |
+
"grad_norm": 36.09375,
|
| 24696 |
+
"learning_rate": 9.897963828916745e-06,
|
| 24697 |
+
"loss": 20.0565,
|
| 24698 |
+
"step": 35190
|
| 24699 |
+
},
|
| 24700 |
+
{
|
| 24701 |
+
"epoch": 0.6532172573689912,
|
| 24702 |
+
"grad_norm": 34.0,
|
| 24703 |
+
"learning_rate": 9.897934833130702e-06,
|
| 24704 |
+
"loss": 19.5167,
|
| 24705 |
+
"step": 35200
|
| 24706 |
+
},
|
| 24707 |
+
{
|
| 24708 |
+
"epoch": 0.653402830453471,
|
| 24709 |
+
"grad_norm": 36.03125,
|
| 24710 |
+
"learning_rate": 9.89790583734466e-06,
|
| 24711 |
+
"loss": 19.5704,
|
| 24712 |
+
"step": 35210
|
| 24713 |
+
},
|
| 24714 |
+
{
|
| 24715 |
+
"epoch": 0.6535884035379509,
|
| 24716 |
+
"grad_norm": 35.15625,
|
| 24717 |
+
"learning_rate": 9.897876841558617e-06,
|
| 24718 |
+
"loss": 19.4049,
|
| 24719 |
+
"step": 35220
|
| 24720 |
+
},
|
| 24721 |
+
{
|
| 24722 |
+
"epoch": 0.6537739766224306,
|
| 24723 |
+
"grad_norm": 35.78125,
|
| 24724 |
+
"learning_rate": 9.897847845772575e-06,
|
| 24725 |
+
"loss": 19.5957,
|
| 24726 |
+
"step": 35230
|
| 24727 |
+
},
|
| 24728 |
+
{
|
| 24729 |
+
"epoch": 0.6539595497069105,
|
| 24730 |
+
"grad_norm": 34.46875,
|
| 24731 |
+
"learning_rate": 9.897818849986532e-06,
|
| 24732 |
+
"loss": 19.1128,
|
| 24733 |
+
"step": 35240
|
| 24734 |
+
},
|
| 24735 |
+
{
|
| 24736 |
+
"epoch": 0.6541451227913904,
|
| 24737 |
+
"grad_norm": 36.09375,
|
| 24738 |
+
"learning_rate": 9.89778985420049e-06,
|
| 24739 |
+
"loss": 19.9392,
|
| 24740 |
+
"step": 35250
|
| 24741 |
+
},
|
| 24742 |
+
{
|
| 24743 |
+
"epoch": 0.6543306958758701,
|
| 24744 |
+
"grad_norm": 34.28125,
|
| 24745 |
+
"learning_rate": 9.897760858414447e-06,
|
| 24746 |
+
"loss": 19.5701,
|
| 24747 |
+
"step": 35260
|
| 24748 |
+
},
|
| 24749 |
+
{
|
| 24750 |
+
"epoch": 0.65451626896035,
|
| 24751 |
+
"grad_norm": 37.03125,
|
| 24752 |
+
"learning_rate": 9.897731862628404e-06,
|
| 24753 |
+
"loss": 19.6126,
|
| 24754 |
+
"step": 35270
|
| 24755 |
+
},
|
| 24756 |
+
{
|
| 24757 |
+
"epoch": 0.6547018420448298,
|
| 24758 |
+
"grad_norm": 35.25,
|
| 24759 |
+
"learning_rate": 9.897702866842362e-06,
|
| 24760 |
+
"loss": 19.5181,
|
| 24761 |
+
"step": 35280
|
| 24762 |
+
},
|
| 24763 |
+
{
|
| 24764 |
+
"epoch": 0.6548874151293096,
|
| 24765 |
+
"grad_norm": 33.96875,
|
| 24766 |
+
"learning_rate": 9.89767387105632e-06,
|
| 24767 |
+
"loss": 20.0623,
|
| 24768 |
+
"step": 35290
|
| 24769 |
+
},
|
| 24770 |
+
{
|
| 24771 |
+
"epoch": 0.6550729882137895,
|
| 24772 |
+
"grad_norm": 34.96875,
|
| 24773 |
+
"learning_rate": 9.897644875270278e-06,
|
| 24774 |
+
"loss": 19.9774,
|
| 24775 |
+
"step": 35300
|
| 24776 |
+
},
|
| 24777 |
+
{
|
| 24778 |
+
"epoch": 0.6552585612982693,
|
| 24779 |
+
"grad_norm": 35.0,
|
| 24780 |
+
"learning_rate": 9.897615879484236e-06,
|
| 24781 |
+
"loss": 20.0012,
|
| 24782 |
+
"step": 35310
|
| 24783 |
+
},
|
| 24784 |
+
{
|
| 24785 |
+
"epoch": 0.6554441343827492,
|
| 24786 |
+
"grad_norm": 33.4375,
|
| 24787 |
+
"learning_rate": 9.897586883698193e-06,
|
| 24788 |
+
"loss": 19.374,
|
| 24789 |
+
"step": 35320
|
| 24790 |
+
},
|
| 24791 |
+
{
|
| 24792 |
+
"epoch": 0.6556297074672289,
|
| 24793 |
+
"grad_norm": 36.15625,
|
| 24794 |
+
"learning_rate": 9.89755788791215e-06,
|
| 24795 |
+
"loss": 19.6139,
|
| 24796 |
+
"step": 35330
|
| 24797 |
+
},
|
| 24798 |
+
{
|
| 24799 |
+
"epoch": 0.6558152805517088,
|
| 24800 |
+
"grad_norm": 34.71875,
|
| 24801 |
+
"learning_rate": 9.897528892126108e-06,
|
| 24802 |
+
"loss": 20.0223,
|
| 24803 |
+
"step": 35340
|
| 24804 |
+
},
|
| 24805 |
+
{
|
| 24806 |
+
"epoch": 0.6560008536361887,
|
| 24807 |
+
"grad_norm": 36.3125,
|
| 24808 |
+
"learning_rate": 9.897499896340065e-06,
|
| 24809 |
+
"loss": 19.6657,
|
| 24810 |
+
"step": 35350
|
| 24811 |
+
},
|
| 24812 |
+
{
|
| 24813 |
+
"epoch": 0.6561864267206684,
|
| 24814 |
+
"grad_norm": 35.90625,
|
| 24815 |
+
"learning_rate": 9.897470900554024e-06,
|
| 24816 |
+
"loss": 19.4794,
|
| 24817 |
+
"step": 35360
|
| 24818 |
+
},
|
| 24819 |
+
{
|
| 24820 |
+
"epoch": 0.6563719998051483,
|
| 24821 |
+
"grad_norm": 35.1875,
|
| 24822 |
+
"learning_rate": 9.89744190476798e-06,
|
| 24823 |
+
"loss": 19.885,
|
| 24824 |
+
"step": 35370
|
| 24825 |
+
},
|
| 24826 |
+
{
|
| 24827 |
+
"epoch": 0.6565575728896281,
|
| 24828 |
+
"grad_norm": 34.875,
|
| 24829 |
+
"learning_rate": 9.897412908981937e-06,
|
| 24830 |
+
"loss": 19.5217,
|
| 24831 |
+
"step": 35380
|
| 24832 |
+
},
|
| 24833 |
+
{
|
| 24834 |
+
"epoch": 0.6567431459741079,
|
| 24835 |
+
"grad_norm": 38.28125,
|
| 24836 |
+
"learning_rate": 9.897383913195897e-06,
|
| 24837 |
+
"loss": 19.5472,
|
| 24838 |
+
"step": 35390
|
| 24839 |
+
},
|
| 24840 |
+
{
|
| 24841 |
+
"epoch": 0.6569287190585877,
|
| 24842 |
+
"grad_norm": 35.5,
|
| 24843 |
+
"learning_rate": 9.897354917409854e-06,
|
| 24844 |
+
"loss": 20.072,
|
| 24845 |
+
"step": 35400
|
| 24846 |
+
},
|
| 24847 |
+
{
|
| 24848 |
+
"epoch": 0.6571142921430676,
|
| 24849 |
+
"grad_norm": 36.125,
|
| 24850 |
+
"learning_rate": 9.897325921623811e-06,
|
| 24851 |
+
"loss": 19.4725,
|
| 24852 |
+
"step": 35410
|
| 24853 |
+
},
|
| 24854 |
+
{
|
| 24855 |
+
"epoch": 0.6572998652275474,
|
| 24856 |
+
"grad_norm": 34.21875,
|
| 24857 |
+
"learning_rate": 9.897296925837769e-06,
|
| 24858 |
+
"loss": 19.6822,
|
| 24859 |
+
"step": 35420
|
| 24860 |
+
},
|
| 24861 |
+
{
|
| 24862 |
+
"epoch": 0.6574854383120272,
|
| 24863 |
+
"grad_norm": 34.96875,
|
| 24864 |
+
"learning_rate": 9.897267930051726e-06,
|
| 24865 |
+
"loss": 19.8072,
|
| 24866 |
+
"step": 35430
|
| 24867 |
+
},
|
| 24868 |
+
{
|
| 24869 |
+
"epoch": 0.6576710113965071,
|
| 24870 |
+
"grad_norm": 35.90625,
|
| 24871 |
+
"learning_rate": 9.897238934265684e-06,
|
| 24872 |
+
"loss": 19.9793,
|
| 24873 |
+
"step": 35440
|
| 24874 |
+
},
|
| 24875 |
+
{
|
| 24876 |
+
"epoch": 0.6578565844809868,
|
| 24877 |
+
"grad_norm": 35.625,
|
| 24878 |
+
"learning_rate": 9.897209938479641e-06,
|
| 24879 |
+
"loss": 19.8958,
|
| 24880 |
+
"step": 35450
|
| 24881 |
+
},
|
| 24882 |
+
{
|
| 24883 |
+
"epoch": 0.6580421575654667,
|
| 24884 |
+
"grad_norm": 36.75,
|
| 24885 |
+
"learning_rate": 9.8971809426936e-06,
|
| 24886 |
+
"loss": 19.4194,
|
| 24887 |
+
"step": 35460
|
| 24888 |
+
},
|
| 24889 |
+
{
|
| 24890 |
+
"epoch": 0.6582277306499466,
|
| 24891 |
+
"grad_norm": 37.4375,
|
| 24892 |
+
"learning_rate": 9.897151946907558e-06,
|
| 24893 |
+
"loss": 20.0411,
|
| 24894 |
+
"step": 35470
|
| 24895 |
+
},
|
| 24896 |
+
{
|
| 24897 |
+
"epoch": 0.6584133037344264,
|
| 24898 |
+
"grad_norm": 35.5625,
|
| 24899 |
+
"learning_rate": 9.897122951121513e-06,
|
| 24900 |
+
"loss": 19.4788,
|
| 24901 |
+
"step": 35480
|
| 24902 |
+
},
|
| 24903 |
+
{
|
| 24904 |
+
"epoch": 0.6585988768189062,
|
| 24905 |
+
"grad_norm": 35.75,
|
| 24906 |
+
"learning_rate": 9.897093955335472e-06,
|
| 24907 |
+
"loss": 19.7224,
|
| 24908 |
+
"step": 35490
|
| 24909 |
+
},
|
| 24910 |
+
{
|
| 24911 |
+
"epoch": 0.658784449903386,
|
| 24912 |
+
"grad_norm": 33.25,
|
| 24913 |
+
"learning_rate": 9.89706495954943e-06,
|
| 24914 |
+
"loss": 19.5881,
|
| 24915 |
+
"step": 35500
|
| 24916 |
+
},
|
| 24917 |
+
{
|
| 24918 |
+
"epoch": 0.6589700229878659,
|
| 24919 |
+
"grad_norm": 35.1875,
|
| 24920 |
+
"learning_rate": 9.897035963763387e-06,
|
| 24921 |
+
"loss": 19.8756,
|
| 24922 |
+
"step": 35510
|
| 24923 |
+
},
|
| 24924 |
+
{
|
| 24925 |
+
"epoch": 0.6591555960723456,
|
| 24926 |
+
"grad_norm": 35.40625,
|
| 24927 |
+
"learning_rate": 9.897006967977345e-06,
|
| 24928 |
+
"loss": 19.8553,
|
| 24929 |
+
"step": 35520
|
| 24930 |
+
},
|
| 24931 |
+
{
|
| 24932 |
+
"epoch": 0.6593411691568255,
|
| 24933 |
+
"grad_norm": 35.9375,
|
| 24934 |
+
"learning_rate": 9.896977972191302e-06,
|
| 24935 |
+
"loss": 19.6849,
|
| 24936 |
+
"step": 35530
|
| 24937 |
+
},
|
| 24938 |
+
{
|
| 24939 |
+
"epoch": 0.6595267422413054,
|
| 24940 |
+
"grad_norm": 36.0,
|
| 24941 |
+
"learning_rate": 9.89694897640526e-06,
|
| 24942 |
+
"loss": 19.5625,
|
| 24943 |
+
"step": 35540
|
| 24944 |
+
},
|
| 24945 |
+
{
|
| 24946 |
+
"epoch": 0.6597123153257851,
|
| 24947 |
+
"grad_norm": 34.21875,
|
| 24948 |
+
"learning_rate": 9.896919980619217e-06,
|
| 24949 |
+
"loss": 19.38,
|
| 24950 |
+
"step": 35550
|
| 24951 |
+
},
|
| 24952 |
+
{
|
| 24953 |
+
"epoch": 0.659897888410265,
|
| 24954 |
+
"grad_norm": 35.125,
|
| 24955 |
+
"learning_rate": 9.896890984833176e-06,
|
| 24956 |
+
"loss": 20.0188,
|
| 24957 |
+
"step": 35560
|
| 24958 |
+
},
|
| 24959 |
+
{
|
| 24960 |
+
"epoch": 0.6600834614947448,
|
| 24961 |
+
"grad_norm": 36.90625,
|
| 24962 |
+
"learning_rate": 9.896861989047133e-06,
|
| 24963 |
+
"loss": 19.509,
|
| 24964 |
+
"step": 35570
|
| 24965 |
+
},
|
| 24966 |
+
{
|
| 24967 |
+
"epoch": 0.6602690345792246,
|
| 24968 |
+
"grad_norm": 36.0625,
|
| 24969 |
+
"learning_rate": 9.89683299326109e-06,
|
| 24970 |
+
"loss": 19.6172,
|
| 24971 |
+
"step": 35580
|
| 24972 |
+
},
|
| 24973 |
+
{
|
| 24974 |
+
"epoch": 0.6604546076637045,
|
| 24975 |
+
"grad_norm": 36.75,
|
| 24976 |
+
"learning_rate": 9.896803997475048e-06,
|
| 24977 |
+
"loss": 20.0115,
|
| 24978 |
+
"step": 35590
|
| 24979 |
+
},
|
| 24980 |
+
{
|
| 24981 |
+
"epoch": 0.6606401807481843,
|
| 24982 |
+
"grad_norm": 35.9375,
|
| 24983 |
+
"learning_rate": 9.896775001689006e-06,
|
| 24984 |
+
"loss": 19.4408,
|
| 24985 |
+
"step": 35600
|
| 24986 |
+
},
|
| 24987 |
+
{
|
| 24988 |
+
"epoch": 0.6608257538326641,
|
| 24989 |
+
"grad_norm": 36.8125,
|
| 24990 |
+
"learning_rate": 9.896746005902963e-06,
|
| 24991 |
+
"loss": 19.8966,
|
| 24992 |
+
"step": 35610
|
| 24993 |
+
},
|
| 24994 |
+
{
|
| 24995 |
+
"epoch": 0.6610113269171439,
|
| 24996 |
+
"grad_norm": 35.4375,
|
| 24997 |
+
"learning_rate": 9.89671701011692e-06,
|
| 24998 |
+
"loss": 19.3682,
|
| 24999 |
+
"step": 35620
|
| 25000 |
+
},
|
| 25001 |
+
{
|
| 25002 |
+
"epoch": 0.6611969000016238,
|
| 25003 |
+
"grad_norm": 36.6875,
|
| 25004 |
+
"learning_rate": 9.896688014330878e-06,
|
| 25005 |
+
"loss": 19.6727,
|
| 25006 |
+
"step": 35630
|
| 25007 |
+
},
|
| 25008 |
+
{
|
| 25009 |
+
"epoch": 0.6613824730861035,
|
| 25010 |
+
"grad_norm": 36.0625,
|
| 25011 |
+
"learning_rate": 9.896659018544835e-06,
|
| 25012 |
+
"loss": 19.741,
|
| 25013 |
+
"step": 35640
|
| 25014 |
+
},
|
| 25015 |
+
{
|
| 25016 |
+
"epoch": 0.6615680461705834,
|
| 25017 |
+
"grad_norm": 36.15625,
|
| 25018 |
+
"learning_rate": 9.896630022758793e-06,
|
| 25019 |
+
"loss": 20.0741,
|
| 25020 |
+
"step": 35650
|
| 25021 |
+
},
|
| 25022 |
+
{
|
| 25023 |
+
"epoch": 0.6617536192550633,
|
| 25024 |
+
"grad_norm": 35.09375,
|
| 25025 |
+
"learning_rate": 9.896601026972752e-06,
|
| 25026 |
+
"loss": 19.2563,
|
| 25027 |
+
"step": 35660
|
| 25028 |
+
},
|
| 25029 |
+
{
|
| 25030 |
+
"epoch": 0.6619391923395431,
|
| 25031 |
+
"grad_norm": 36.25,
|
| 25032 |
+
"learning_rate": 9.896572031186709e-06,
|
| 25033 |
+
"loss": 19.8901,
|
| 25034 |
+
"step": 35670
|
| 25035 |
+
},
|
| 25036 |
+
{
|
| 25037 |
+
"epoch": 0.6621247654240229,
|
| 25038 |
+
"grad_norm": 34.71875,
|
| 25039 |
+
"learning_rate": 9.896543035400666e-06,
|
| 25040 |
+
"loss": 19.9751,
|
| 25041 |
+
"step": 35680
|
| 25042 |
+
},
|
| 25043 |
+
{
|
| 25044 |
+
"epoch": 0.6623103385085027,
|
| 25045 |
+
"grad_norm": 33.90625,
|
| 25046 |
+
"learning_rate": 9.896514039614624e-06,
|
| 25047 |
+
"loss": 19.0984,
|
| 25048 |
+
"step": 35690
|
| 25049 |
+
},
|
| 25050 |
+
{
|
| 25051 |
+
"epoch": 0.6624959115929826,
|
| 25052 |
+
"grad_norm": 35.09375,
|
| 25053 |
+
"learning_rate": 9.896485043828581e-06,
|
| 25054 |
+
"loss": 19.549,
|
| 25055 |
+
"step": 35700
|
| 25056 |
+
},
|
| 25057 |
+
{
|
| 25058 |
+
"epoch": 0.6626814846774624,
|
| 25059 |
+
"grad_norm": 34.40625,
|
| 25060 |
+
"learning_rate": 9.896456048042539e-06,
|
| 25061 |
+
"loss": 19.4845,
|
| 25062 |
+
"step": 35710
|
| 25063 |
+
},
|
| 25064 |
+
{
|
| 25065 |
+
"epoch": 0.6628670577619422,
|
| 25066 |
+
"grad_norm": 35.8125,
|
| 25067 |
+
"learning_rate": 9.896427052256496e-06,
|
| 25068 |
+
"loss": 19.5595,
|
| 25069 |
+
"step": 35720
|
| 25070 |
+
},
|
| 25071 |
+
{
|
| 25072 |
+
"epoch": 0.6630526308464221,
|
| 25073 |
+
"grad_norm": 36.21875,
|
| 25074 |
+
"learning_rate": 9.896398056470453e-06,
|
| 25075 |
+
"loss": 20.1986,
|
| 25076 |
+
"step": 35730
|
| 25077 |
+
},
|
| 25078 |
+
{
|
| 25079 |
+
"epoch": 0.6632382039309018,
|
| 25080 |
+
"grad_norm": 35.09375,
|
| 25081 |
+
"learning_rate": 9.896369060684413e-06,
|
| 25082 |
+
"loss": 19.7129,
|
| 25083 |
+
"step": 35740
|
| 25084 |
+
},
|
| 25085 |
+
{
|
| 25086 |
+
"epoch": 0.6634237770153817,
|
| 25087 |
+
"grad_norm": 37.5,
|
| 25088 |
+
"learning_rate": 9.896340064898368e-06,
|
| 25089 |
+
"loss": 19.6799,
|
| 25090 |
+
"step": 35750
|
| 25091 |
+
},
|
| 25092 |
+
{
|
| 25093 |
+
"epoch": 0.6636093500998616,
|
| 25094 |
+
"grad_norm": 34.65625,
|
| 25095 |
+
"learning_rate": 9.896311069112326e-06,
|
| 25096 |
+
"loss": 19.6695,
|
| 25097 |
+
"step": 35760
|
| 25098 |
+
},
|
| 25099 |
+
{
|
| 25100 |
+
"epoch": 0.6637949231843413,
|
| 25101 |
+
"grad_norm": 36.28125,
|
| 25102 |
+
"learning_rate": 9.896282073326285e-06,
|
| 25103 |
+
"loss": 19.8379,
|
| 25104 |
+
"step": 35770
|
| 25105 |
+
},
|
| 25106 |
+
{
|
| 25107 |
+
"epoch": 0.6639804962688212,
|
| 25108 |
+
"grad_norm": 36.0625,
|
| 25109 |
+
"learning_rate": 9.896253077540242e-06,
|
| 25110 |
+
"loss": 19.4283,
|
| 25111 |
+
"step": 35780
|
| 25112 |
+
},
|
| 25113 |
+
{
|
| 25114 |
+
"epoch": 0.664166069353301,
|
| 25115 |
+
"grad_norm": 36.75,
|
| 25116 |
+
"learning_rate": 9.8962240817542e-06,
|
| 25117 |
+
"loss": 19.5346,
|
| 25118 |
+
"step": 35790
|
| 25119 |
+
},
|
| 25120 |
+
{
|
| 25121 |
+
"epoch": 0.6643516424377808,
|
| 25122 |
+
"grad_norm": 36.625,
|
| 25123 |
+
"learning_rate": 9.896195085968157e-06,
|
| 25124 |
+
"loss": 19.9317,
|
| 25125 |
+
"step": 35800
|
| 25126 |
+
},
|
| 25127 |
+
{
|
| 25128 |
+
"epoch": 0.6645372155222606,
|
| 25129 |
+
"grad_norm": 36.65625,
|
| 25130 |
+
"learning_rate": 9.896166090182114e-06,
|
| 25131 |
+
"loss": 20.057,
|
| 25132 |
+
"step": 35810
|
| 25133 |
+
},
|
| 25134 |
+
{
|
| 25135 |
+
"epoch": 0.6647227886067405,
|
| 25136 |
+
"grad_norm": 37.375,
|
| 25137 |
+
"learning_rate": 9.896137094396072e-06,
|
| 25138 |
+
"loss": 19.5665,
|
| 25139 |
+
"step": 35820
|
| 25140 |
+
},
|
| 25141 |
+
{
|
| 25142 |
+
"epoch": 0.6649083616912203,
|
| 25143 |
+
"grad_norm": 34.875,
|
| 25144 |
+
"learning_rate": 9.89610809861003e-06,
|
| 25145 |
+
"loss": 19.5112,
|
| 25146 |
+
"step": 35830
|
| 25147 |
+
},
|
| 25148 |
+
{
|
| 25149 |
+
"epoch": 0.6650939347757001,
|
| 25150 |
+
"grad_norm": 35.03125,
|
| 25151 |
+
"learning_rate": 9.896079102823988e-06,
|
| 25152 |
+
"loss": 19.7005,
|
| 25153 |
+
"step": 35840
|
| 25154 |
+
},
|
| 25155 |
+
{
|
| 25156 |
+
"epoch": 0.66527950786018,
|
| 25157 |
+
"grad_norm": 36.46875,
|
| 25158 |
+
"learning_rate": 9.896050107037944e-06,
|
| 25159 |
+
"loss": 19.6946,
|
| 25160 |
+
"step": 35850
|
| 25161 |
+
},
|
| 25162 |
+
{
|
| 25163 |
+
"epoch": 0.6654650809446598,
|
| 25164 |
+
"grad_norm": 35.96875,
|
| 25165 |
+
"learning_rate": 9.896021111251901e-06,
|
| 25166 |
+
"loss": 19.9033,
|
| 25167 |
+
"step": 35860
|
| 25168 |
+
},
|
| 25169 |
+
{
|
| 25170 |
+
"epoch": 0.6656506540291396,
|
| 25171 |
+
"grad_norm": 35.15625,
|
| 25172 |
+
"learning_rate": 9.89599211546586e-06,
|
| 25173 |
+
"loss": 19.7513,
|
| 25174 |
+
"step": 35870
|
| 25175 |
+
},
|
| 25176 |
+
{
|
| 25177 |
+
"epoch": 0.6658362271136194,
|
| 25178 |
+
"grad_norm": 34.28125,
|
| 25179 |
+
"learning_rate": 9.895963119679818e-06,
|
| 25180 |
+
"loss": 19.5514,
|
| 25181 |
+
"step": 35880
|
| 25182 |
+
},
|
| 25183 |
+
{
|
| 25184 |
+
"epoch": 0.6660218001980993,
|
| 25185 |
+
"grad_norm": 34.75,
|
| 25186 |
+
"learning_rate": 9.895934123893775e-06,
|
| 25187 |
+
"loss": 19.5261,
|
| 25188 |
+
"step": 35890
|
| 25189 |
+
},
|
| 25190 |
+
{
|
| 25191 |
+
"epoch": 0.6662073732825791,
|
| 25192 |
+
"grad_norm": 34.25,
|
| 25193 |
+
"learning_rate": 9.895905128107733e-06,
|
| 25194 |
+
"loss": 20.0393,
|
| 25195 |
+
"step": 35900
|
| 25196 |
+
},
|
| 25197 |
+
{
|
| 25198 |
+
"epoch": 0.6663929463670589,
|
| 25199 |
+
"grad_norm": 34.40625,
|
| 25200 |
+
"learning_rate": 9.89587613232169e-06,
|
| 25201 |
+
"loss": 19.6479,
|
| 25202 |
+
"step": 35910
|
| 25203 |
+
},
|
| 25204 |
+
{
|
| 25205 |
+
"epoch": 0.6665785194515388,
|
| 25206 |
+
"grad_norm": 37.53125,
|
| 25207 |
+
"learning_rate": 9.895847136535648e-06,
|
| 25208 |
+
"loss": 19.9184,
|
| 25209 |
+
"step": 35920
|
| 25210 |
+
},
|
| 25211 |
+
{
|
| 25212 |
+
"epoch": 0.6667640925360185,
|
| 25213 |
+
"grad_norm": 36.28125,
|
| 25214 |
+
"learning_rate": 9.895818140749605e-06,
|
| 25215 |
+
"loss": 19.9199,
|
| 25216 |
+
"step": 35930
|
| 25217 |
+
},
|
| 25218 |
+
{
|
| 25219 |
+
"epoch": 0.6669496656204984,
|
| 25220 |
+
"grad_norm": 38.84375,
|
| 25221 |
+
"learning_rate": 9.895789144963564e-06,
|
| 25222 |
+
"loss": 20.2039,
|
| 25223 |
+
"step": 35940
|
| 25224 |
+
},
|
| 25225 |
+
{
|
| 25226 |
+
"epoch": 0.6671352387049783,
|
| 25227 |
+
"grad_norm": 33.3125,
|
| 25228 |
+
"learning_rate": 9.895760149177522e-06,
|
| 25229 |
+
"loss": 19.3214,
|
| 25230 |
+
"step": 35950
|
| 25231 |
+
},
|
| 25232 |
+
{
|
| 25233 |
+
"epoch": 0.667320811789458,
|
| 25234 |
+
"grad_norm": 36.34375,
|
| 25235 |
+
"learning_rate": 9.895731153391477e-06,
|
| 25236 |
+
"loss": 19.4983,
|
| 25237 |
+
"step": 35960
|
| 25238 |
+
},
|
| 25239 |
+
{
|
| 25240 |
+
"epoch": 0.6675063848739379,
|
| 25241 |
+
"grad_norm": 32.9375,
|
| 25242 |
+
"learning_rate": 9.895702157605436e-06,
|
| 25243 |
+
"loss": 19.6202,
|
| 25244 |
+
"step": 35970
|
| 25245 |
+
},
|
| 25246 |
+
{
|
| 25247 |
+
"epoch": 0.6676919579584177,
|
| 25248 |
+
"grad_norm": 37.0625,
|
| 25249 |
+
"learning_rate": 9.895673161819394e-06,
|
| 25250 |
+
"loss": 19.7313,
|
| 25251 |
+
"step": 35980
|
| 25252 |
+
},
|
| 25253 |
+
{
|
| 25254 |
+
"epoch": 0.6678775310428975,
|
| 25255 |
+
"grad_norm": 36.90625,
|
| 25256 |
+
"learning_rate": 9.895644166033351e-06,
|
| 25257 |
+
"loss": 19.6996,
|
| 25258 |
+
"step": 35990
|
| 25259 |
+
},
|
| 25260 |
+
{
|
| 25261 |
+
"epoch": 0.6680631041273773,
|
| 25262 |
+
"grad_norm": 34.625,
|
| 25263 |
+
"learning_rate": 9.895615170247309e-06,
|
| 25264 |
+
"loss": 19.2428,
|
| 25265 |
+
"step": 36000
|
| 25266 |
+
},
|
| 25267 |
+
{
|
| 25268 |
+
"epoch": 0.6682486772118572,
|
| 25269 |
+
"grad_norm": 35.0,
|
| 25270 |
+
"learning_rate": 9.895586174461268e-06,
|
| 25271 |
+
"loss": 20.2196,
|
| 25272 |
+
"step": 36010
|
| 25273 |
+
},
|
| 25274 |
+
{
|
| 25275 |
+
"epoch": 0.6684342502963371,
|
| 25276 |
+
"grad_norm": 35.0625,
|
| 25277 |
+
"learning_rate": 9.895557178675223e-06,
|
| 25278 |
+
"loss": 19.687,
|
| 25279 |
+
"step": 36020
|
| 25280 |
+
},
|
| 25281 |
+
{
|
| 25282 |
+
"epoch": 0.6686198233808168,
|
| 25283 |
+
"grad_norm": 35.53125,
|
| 25284 |
+
"learning_rate": 9.89552818288918e-06,
|
| 25285 |
+
"loss": 19.5315,
|
| 25286 |
+
"step": 36030
|
| 25287 |
+
},
|
| 25288 |
+
{
|
| 25289 |
+
"epoch": 0.6688053964652967,
|
| 25290 |
+
"grad_norm": 34.5625,
|
| 25291 |
+
"learning_rate": 9.89549918710314e-06,
|
| 25292 |
+
"loss": 19.8379,
|
| 25293 |
+
"step": 36040
|
| 25294 |
+
},
|
| 25295 |
+
{
|
| 25296 |
+
"epoch": 0.6689909695497765,
|
| 25297 |
+
"grad_norm": 33.53125,
|
| 25298 |
+
"learning_rate": 9.895470191317097e-06,
|
| 25299 |
+
"loss": 19.9126,
|
| 25300 |
+
"step": 36050
|
| 25301 |
+
},
|
| 25302 |
+
{
|
| 25303 |
+
"epoch": 0.6691765426342563,
|
| 25304 |
+
"grad_norm": 36.5,
|
| 25305 |
+
"learning_rate": 9.895441195531055e-06,
|
| 25306 |
+
"loss": 19.7775,
|
| 25307 |
+
"step": 36060
|
| 25308 |
+
},
|
| 25309 |
+
{
|
| 25310 |
+
"epoch": 0.6693621157187362,
|
| 25311 |
+
"grad_norm": 35.375,
|
| 25312 |
+
"learning_rate": 9.895412199745012e-06,
|
| 25313 |
+
"loss": 19.745,
|
| 25314 |
+
"step": 36070
|
| 25315 |
+
},
|
| 25316 |
+
{
|
| 25317 |
+
"epoch": 0.669547688803216,
|
| 25318 |
+
"grad_norm": 35.375,
|
| 25319 |
+
"learning_rate": 9.89538320395897e-06,
|
| 25320 |
+
"loss": 19.7045,
|
| 25321 |
+
"step": 36080
|
| 25322 |
+
},
|
| 25323 |
+
{
|
| 25324 |
+
"epoch": 0.6697332618876958,
|
| 25325 |
+
"grad_norm": 34.6875,
|
| 25326 |
+
"learning_rate": 9.895354208172927e-06,
|
| 25327 |
+
"loss": 18.9317,
|
| 25328 |
+
"step": 36090
|
| 25329 |
+
},
|
| 25330 |
+
{
|
| 25331 |
+
"epoch": 0.6699188349721756,
|
| 25332 |
+
"grad_norm": 35.59375,
|
| 25333 |
+
"learning_rate": 9.895325212386884e-06,
|
| 25334 |
+
"loss": 19.8415,
|
| 25335 |
+
"step": 36100
|
| 25336 |
+
},
|
| 25337 |
+
{
|
| 25338 |
+
"epoch": 0.6701044080566555,
|
| 25339 |
+
"grad_norm": 34.71875,
|
| 25340 |
+
"learning_rate": 9.895296216600843e-06,
|
| 25341 |
+
"loss": 19.7913,
|
| 25342 |
+
"step": 36110
|
| 25343 |
+
},
|
| 25344 |
+
{
|
| 25345 |
+
"epoch": 0.6702899811411352,
|
| 25346 |
+
"grad_norm": 35.59375,
|
| 25347 |
+
"learning_rate": 9.895267220814799e-06,
|
| 25348 |
+
"loss": 19.5304,
|
| 25349 |
+
"step": 36120
|
| 25350 |
+
},
|
| 25351 |
+
{
|
| 25352 |
+
"epoch": 0.6704755542256151,
|
| 25353 |
+
"grad_norm": 34.96875,
|
| 25354 |
+
"learning_rate": 9.895238225028757e-06,
|
| 25355 |
+
"loss": 19.4498,
|
| 25356 |
+
"step": 36130
|
| 25357 |
+
},
|
| 25358 |
+
{
|
| 25359 |
+
"epoch": 0.670661127310095,
|
| 25360 |
+
"grad_norm": 35.25,
|
| 25361 |
+
"learning_rate": 9.895209229242716e-06,
|
| 25362 |
+
"loss": 19.6703,
|
| 25363 |
+
"step": 36140
|
| 25364 |
+
},
|
| 25365 |
+
{
|
| 25366 |
+
"epoch": 0.6708467003945747,
|
| 25367 |
+
"grad_norm": 34.21875,
|
| 25368 |
+
"learning_rate": 9.895180233456673e-06,
|
| 25369 |
+
"loss": 19.4621,
|
| 25370 |
+
"step": 36150
|
| 25371 |
+
},
|
| 25372 |
+
{
|
| 25373 |
+
"epoch": 0.6710322734790546,
|
| 25374 |
+
"grad_norm": 34.625,
|
| 25375 |
+
"learning_rate": 9.89515123767063e-06,
|
| 25376 |
+
"loss": 19.6551,
|
| 25377 |
+
"step": 36160
|
| 25378 |
+
},
|
| 25379 |
+
{
|
| 25380 |
+
"epoch": 0.6712178465635344,
|
| 25381 |
+
"grad_norm": 33.625,
|
| 25382 |
+
"learning_rate": 9.895122241884588e-06,
|
| 25383 |
+
"loss": 19.5649,
|
| 25384 |
+
"step": 36170
|
| 25385 |
+
},
|
| 25386 |
+
{
|
| 25387 |
+
"epoch": 0.6714034196480142,
|
| 25388 |
+
"grad_norm": 37.3125,
|
| 25389 |
+
"learning_rate": 9.895093246098545e-06,
|
| 25390 |
+
"loss": 19.501,
|
| 25391 |
+
"step": 36180
|
| 25392 |
+
},
|
| 25393 |
+
{
|
| 25394 |
+
"epoch": 0.6715889927324941,
|
| 25395 |
+
"grad_norm": 33.90625,
|
| 25396 |
+
"learning_rate": 9.895064250312503e-06,
|
| 25397 |
+
"loss": 19.7173,
|
| 25398 |
+
"step": 36190
|
| 25399 |
+
},
|
| 25400 |
+
{
|
| 25401 |
+
"epoch": 0.6717745658169739,
|
| 25402 |
+
"grad_norm": 37.9375,
|
| 25403 |
+
"learning_rate": 9.89503525452646e-06,
|
| 25404 |
+
"loss": 19.8307,
|
| 25405 |
+
"step": 36200
|
| 25406 |
+
},
|
| 25407 |
+
{
|
| 25408 |
+
"epoch": 0.6719601389014538,
|
| 25409 |
+
"grad_norm": 37.375,
|
| 25410 |
+
"learning_rate": 9.895006258740418e-06,
|
| 25411 |
+
"loss": 19.3379,
|
| 25412 |
+
"step": 36210
|
| 25413 |
+
},
|
| 25414 |
+
{
|
| 25415 |
+
"epoch": 0.6721457119859335,
|
| 25416 |
+
"grad_norm": 35.4375,
|
| 25417 |
+
"learning_rate": 9.894977262954377e-06,
|
| 25418 |
+
"loss": 19.5971,
|
| 25419 |
+
"step": 36220
|
| 25420 |
+
},
|
| 25421 |
+
{
|
| 25422 |
+
"epoch": 0.6723312850704134,
|
| 25423 |
+
"grad_norm": 35.78125,
|
| 25424 |
+
"learning_rate": 9.894948267168332e-06,
|
| 25425 |
+
"loss": 19.6289,
|
| 25426 |
+
"step": 36230
|
| 25427 |
+
},
|
| 25428 |
+
{
|
| 25429 |
+
"epoch": 0.6725168581548933,
|
| 25430 |
+
"grad_norm": 34.59375,
|
| 25431 |
+
"learning_rate": 9.894919271382291e-06,
|
| 25432 |
+
"loss": 19.5154,
|
| 25433 |
+
"step": 36240
|
| 25434 |
+
},
|
| 25435 |
+
{
|
| 25436 |
+
"epoch": 0.672702431239373,
|
| 25437 |
+
"grad_norm": 37.40625,
|
| 25438 |
+
"learning_rate": 9.894890275596249e-06,
|
| 25439 |
+
"loss": 19.7842,
|
| 25440 |
+
"step": 36250
|
| 25441 |
+
},
|
| 25442 |
+
{
|
| 25443 |
+
"epoch": 0.6728880043238529,
|
| 25444 |
+
"grad_norm": 35.71875,
|
| 25445 |
+
"learning_rate": 9.894861279810206e-06,
|
| 25446 |
+
"loss": 19.624,
|
| 25447 |
+
"step": 36260
|
| 25448 |
+
},
|
| 25449 |
+
{
|
| 25450 |
+
"epoch": 0.6730735774083327,
|
| 25451 |
+
"grad_norm": 36.0625,
|
| 25452 |
+
"learning_rate": 9.894832284024164e-06,
|
| 25453 |
+
"loss": 19.7039,
|
| 25454 |
+
"step": 36270
|
| 25455 |
+
},
|
| 25456 |
+
{
|
| 25457 |
+
"epoch": 0.6732591504928125,
|
| 25458 |
+
"grad_norm": 35.125,
|
| 25459 |
+
"learning_rate": 9.894803288238121e-06,
|
| 25460 |
+
"loss": 19.9208,
|
| 25461 |
+
"step": 36280
|
| 25462 |
+
},
|
| 25463 |
+
{
|
| 25464 |
+
"epoch": 0.6734447235772923,
|
| 25465 |
+
"grad_norm": 35.34375,
|
| 25466 |
+
"learning_rate": 9.894774292452078e-06,
|
| 25467 |
+
"loss": 19.3393,
|
| 25468 |
+
"step": 36290
|
| 25469 |
+
},
|
| 25470 |
+
{
|
| 25471 |
+
"epoch": 0.6736302966617722,
|
| 25472 |
+
"grad_norm": 35.53125,
|
| 25473 |
+
"learning_rate": 9.894745296666036e-06,
|
| 25474 |
+
"loss": 19.7506,
|
| 25475 |
+
"step": 36300
|
| 25476 |
+
},
|
| 25477 |
+
{
|
| 25478 |
+
"epoch": 0.673815869746252,
|
| 25479 |
+
"grad_norm": 37.28125,
|
| 25480 |
+
"learning_rate": 9.894716300879993e-06,
|
| 25481 |
+
"loss": 19.8507,
|
| 25482 |
+
"step": 36310
|
| 25483 |
+
},
|
| 25484 |
+
{
|
| 25485 |
+
"epoch": 0.6740014428307318,
|
| 25486 |
+
"grad_norm": 35.78125,
|
| 25487 |
+
"learning_rate": 9.894687305093952e-06,
|
| 25488 |
+
"loss": 19.6936,
|
| 25489 |
+
"step": 36320
|
| 25490 |
+
},
|
| 25491 |
+
{
|
| 25492 |
+
"epoch": 0.6741870159152117,
|
| 25493 |
+
"grad_norm": 37.65625,
|
| 25494 |
+
"learning_rate": 9.89465830930791e-06,
|
| 25495 |
+
"loss": 19.689,
|
| 25496 |
+
"step": 36330
|
| 25497 |
+
},
|
| 25498 |
+
{
|
| 25499 |
+
"epoch": 0.6743725889996914,
|
| 25500 |
+
"grad_norm": 37.0625,
|
| 25501 |
+
"learning_rate": 9.894629313521865e-06,
|
| 25502 |
+
"loss": 19.8617,
|
| 25503 |
+
"step": 36340
|
| 25504 |
+
},
|
| 25505 |
+
{
|
| 25506 |
+
"epoch": 0.6745581620841713,
|
| 25507 |
+
"grad_norm": 38.6875,
|
| 25508 |
+
"learning_rate": 9.894600317735825e-06,
|
| 25509 |
+
"loss": 19.7849,
|
| 25510 |
+
"step": 36350
|
| 25511 |
+
},
|
| 25512 |
+
{
|
| 25513 |
+
"epoch": 0.6747437351686512,
|
| 25514 |
+
"grad_norm": 36.71875,
|
| 25515 |
+
"learning_rate": 9.894571321949782e-06,
|
| 25516 |
+
"loss": 19.5811,
|
| 25517 |
+
"step": 36360
|
| 25518 |
+
},
|
| 25519 |
+
{
|
| 25520 |
+
"epoch": 0.6749293082531309,
|
| 25521 |
+
"grad_norm": 35.625,
|
| 25522 |
+
"learning_rate": 9.89454232616374e-06,
|
| 25523 |
+
"loss": 19.6559,
|
| 25524 |
+
"step": 36370
|
| 25525 |
+
},
|
| 25526 |
+
{
|
| 25527 |
+
"epoch": 0.6751148813376108,
|
| 25528 |
+
"grad_norm": 36.90625,
|
| 25529 |
+
"learning_rate": 9.894513330377697e-06,
|
| 25530 |
+
"loss": 19.5922,
|
| 25531 |
+
"step": 36380
|
| 25532 |
+
},
|
| 25533 |
+
{
|
| 25534 |
+
"epoch": 0.6753004544220906,
|
| 25535 |
+
"grad_norm": 37.0,
|
| 25536 |
+
"learning_rate": 9.894484334591654e-06,
|
| 25537 |
+
"loss": 19.6276,
|
| 25538 |
+
"step": 36390
|
| 25539 |
+
},
|
| 25540 |
+
{
|
| 25541 |
+
"epoch": 0.6754860275065705,
|
| 25542 |
+
"grad_norm": 35.84375,
|
| 25543 |
+
"learning_rate": 9.894455338805612e-06,
|
| 25544 |
+
"loss": 19.9161,
|
| 25545 |
+
"step": 36400
|
| 25546 |
+
},
|
| 25547 |
+
{
|
| 25548 |
+
"epoch": 0.6756716005910502,
|
| 25549 |
+
"grad_norm": 36.6875,
|
| 25550 |
+
"learning_rate": 9.894426343019569e-06,
|
| 25551 |
+
"loss": 19.9491,
|
| 25552 |
+
"step": 36410
|
| 25553 |
+
},
|
| 25554 |
+
{
|
| 25555 |
+
"epoch": 0.6758571736755301,
|
| 25556 |
+
"grad_norm": 35.03125,
|
| 25557 |
+
"learning_rate": 9.894397347233528e-06,
|
| 25558 |
+
"loss": 19.4294,
|
| 25559 |
+
"step": 36420
|
| 25560 |
+
},
|
| 25561 |
+
{
|
| 25562 |
+
"epoch": 0.67604274676001,
|
| 25563 |
+
"grad_norm": 35.25,
|
| 25564 |
+
"learning_rate": 9.894368351447486e-06,
|
| 25565 |
+
"loss": 19.8872,
|
| 25566 |
+
"step": 36430
|
| 25567 |
+
},
|
| 25568 |
+
{
|
| 25569 |
+
"epoch": 0.6762283198444897,
|
| 25570 |
+
"grad_norm": 35.84375,
|
| 25571 |
+
"learning_rate": 9.894339355661441e-06,
|
| 25572 |
+
"loss": 19.9173,
|
| 25573 |
+
"step": 36440
|
| 25574 |
+
},
|
| 25575 |
+
{
|
| 25576 |
+
"epoch": 0.6764138929289696,
|
| 25577 |
+
"grad_norm": 36.59375,
|
| 25578 |
+
"learning_rate": 9.8943103598754e-06,
|
| 25579 |
+
"loss": 19.9363,
|
| 25580 |
+
"step": 36450
|
| 25581 |
+
},
|
| 25582 |
+
{
|
| 25583 |
+
"epoch": 0.6765994660134494,
|
| 25584 |
+
"grad_norm": 36.03125,
|
| 25585 |
+
"learning_rate": 9.894281364089358e-06,
|
| 25586 |
+
"loss": 20.222,
|
| 25587 |
+
"step": 36460
|
| 25588 |
+
},
|
| 25589 |
+
{
|
| 25590 |
+
"epoch": 0.6767850390979292,
|
| 25591 |
+
"grad_norm": 36.09375,
|
| 25592 |
+
"learning_rate": 9.894252368303315e-06,
|
| 25593 |
+
"loss": 19.7718,
|
| 25594 |
+
"step": 36470
|
| 25595 |
+
},
|
| 25596 |
+
{
|
| 25597 |
+
"epoch": 0.6769706121824091,
|
| 25598 |
+
"grad_norm": 36.5,
|
| 25599 |
+
"learning_rate": 9.894223372517273e-06,
|
| 25600 |
+
"loss": 19.5082,
|
| 25601 |
+
"step": 36480
|
| 25602 |
+
},
|
| 25603 |
+
{
|
| 25604 |
+
"epoch": 0.6771561852668889,
|
| 25605 |
+
"grad_norm": 33.59375,
|
| 25606 |
+
"learning_rate": 9.894194376731232e-06,
|
| 25607 |
+
"loss": 19.2585,
|
| 25608 |
+
"step": 36490
|
| 25609 |
+
},
|
| 25610 |
+
{
|
| 25611 |
+
"epoch": 0.6773417583513687,
|
| 25612 |
+
"grad_norm": 34.875,
|
| 25613 |
+
"learning_rate": 9.894165380945187e-06,
|
| 25614 |
+
"loss": 19.7396,
|
| 25615 |
+
"step": 36500
|
| 25616 |
+
},
|
| 25617 |
+
{
|
| 25618 |
+
"epoch": 0.6775273314358485,
|
| 25619 |
+
"grad_norm": 36.6875,
|
| 25620 |
+
"learning_rate": 9.894136385159145e-06,
|
| 25621 |
+
"loss": 20.1396,
|
| 25622 |
+
"step": 36510
|
| 25623 |
+
},
|
| 25624 |
+
{
|
| 25625 |
+
"epoch": 0.6777129045203284,
|
| 25626 |
+
"grad_norm": 34.9375,
|
| 25627 |
+
"learning_rate": 9.894107389373104e-06,
|
| 25628 |
+
"loss": 19.3631,
|
| 25629 |
+
"step": 36520
|
| 25630 |
+
},
|
| 25631 |
+
{
|
| 25632 |
+
"epoch": 0.6778984776048081,
|
| 25633 |
+
"grad_norm": 36.0625,
|
| 25634 |
+
"learning_rate": 9.894078393587061e-06,
|
| 25635 |
+
"loss": 19.5291,
|
| 25636 |
+
"step": 36530
|
| 25637 |
+
},
|
| 25638 |
+
{
|
| 25639 |
+
"epoch": 0.678084050689288,
|
| 25640 |
+
"grad_norm": 35.625,
|
| 25641 |
+
"learning_rate": 9.894049397801019e-06,
|
| 25642 |
+
"loss": 19.3359,
|
| 25643 |
+
"step": 36540
|
| 25644 |
+
},
|
| 25645 |
+
{
|
| 25646 |
+
"epoch": 0.6782696237737679,
|
| 25647 |
+
"grad_norm": 34.03125,
|
| 25648 |
+
"learning_rate": 9.894020402014976e-06,
|
| 25649 |
+
"loss": 19.4528,
|
| 25650 |
+
"step": 36550
|
| 25651 |
+
},
|
| 25652 |
+
{
|
| 25653 |
+
"epoch": 0.6784551968582477,
|
| 25654 |
+
"grad_norm": 35.0,
|
| 25655 |
+
"learning_rate": 9.893991406228934e-06,
|
| 25656 |
+
"loss": 19.4136,
|
| 25657 |
+
"step": 36560
|
| 25658 |
+
},
|
| 25659 |
+
{
|
| 25660 |
+
"epoch": 0.6786407699427275,
|
| 25661 |
+
"grad_norm": 35.125,
|
| 25662 |
+
"learning_rate": 9.893962410442891e-06,
|
| 25663 |
+
"loss": 19.4879,
|
| 25664 |
+
"step": 36570
|
| 25665 |
+
},
|
| 25666 |
+
{
|
| 25667 |
+
"epoch": 0.6788263430272073,
|
| 25668 |
+
"grad_norm": 35.4375,
|
| 25669 |
+
"learning_rate": 9.893933414656848e-06,
|
| 25670 |
+
"loss": 19.7995,
|
| 25671 |
+
"step": 36580
|
| 25672 |
+
},
|
| 25673 |
+
{
|
| 25674 |
+
"epoch": 0.6790119161116872,
|
| 25675 |
+
"grad_norm": 33.875,
|
| 25676 |
+
"learning_rate": 9.893904418870807e-06,
|
| 25677 |
+
"loss": 19.754,
|
| 25678 |
+
"step": 36590
|
| 25679 |
+
},
|
| 25680 |
+
{
|
| 25681 |
+
"epoch": 0.679197489196167,
|
| 25682 |
+
"grad_norm": 37.03125,
|
| 25683 |
+
"learning_rate": 9.893875423084765e-06,
|
| 25684 |
+
"loss": 20.028,
|
| 25685 |
+
"step": 36600
|
| 25686 |
+
},
|
| 25687 |
+
{
|
| 25688 |
+
"epoch": 0.6793830622806468,
|
| 25689 |
+
"grad_norm": 35.46875,
|
| 25690 |
+
"learning_rate": 9.89384642729872e-06,
|
| 25691 |
+
"loss": 19.4872,
|
| 25692 |
+
"step": 36610
|
| 25693 |
+
},
|
| 25694 |
+
{
|
| 25695 |
+
"epoch": 0.6795686353651267,
|
| 25696 |
+
"grad_norm": 36.8125,
|
| 25697 |
+
"learning_rate": 9.89381743151268e-06,
|
| 25698 |
+
"loss": 19.7629,
|
| 25699 |
+
"step": 36620
|
| 25700 |
+
},
|
| 25701 |
+
{
|
| 25702 |
+
"epoch": 0.6797542084496064,
|
| 25703 |
+
"grad_norm": 32.21875,
|
| 25704 |
+
"learning_rate": 9.893788435726637e-06,
|
| 25705 |
+
"loss": 19.2777,
|
| 25706 |
+
"step": 36630
|
| 25707 |
+
},
|
| 25708 |
+
{
|
| 25709 |
+
"epoch": 0.6799397815340863,
|
| 25710 |
+
"grad_norm": 35.375,
|
| 25711 |
+
"learning_rate": 9.893759439940594e-06,
|
| 25712 |
+
"loss": 19.8635,
|
| 25713 |
+
"step": 36640
|
| 25714 |
+
},
|
| 25715 |
+
{
|
| 25716 |
+
"epoch": 0.6801253546185662,
|
| 25717 |
+
"grad_norm": 37.40625,
|
| 25718 |
+
"learning_rate": 9.893730444154552e-06,
|
| 25719 |
+
"loss": 19.7788,
|
| 25720 |
+
"step": 36650
|
| 25721 |
+
},
|
| 25722 |
+
{
|
| 25723 |
+
"epoch": 0.6803109277030459,
|
| 25724 |
+
"grad_norm": 35.375,
|
| 25725 |
+
"learning_rate": 9.89370144836851e-06,
|
| 25726 |
+
"loss": 19.6674,
|
| 25727 |
+
"step": 36660
|
| 25728 |
+
},
|
| 25729 |
+
{
|
| 25730 |
+
"epoch": 0.6804965007875258,
|
| 25731 |
+
"grad_norm": 35.6875,
|
| 25732 |
+
"learning_rate": 9.893672452582467e-06,
|
| 25733 |
+
"loss": 19.4339,
|
| 25734 |
+
"step": 36670
|
| 25735 |
+
},
|
| 25736 |
+
{
|
| 25737 |
+
"epoch": 0.6806820738720056,
|
| 25738 |
+
"grad_norm": 34.09375,
|
| 25739 |
+
"learning_rate": 9.893643456796424e-06,
|
| 25740 |
+
"loss": 19.9585,
|
| 25741 |
+
"step": 36680
|
| 25742 |
+
},
|
| 25743 |
+
{
|
| 25744 |
+
"epoch": 0.6808676469564854,
|
| 25745 |
+
"grad_norm": 37.40625,
|
| 25746 |
+
"learning_rate": 9.893614461010382e-06,
|
| 25747 |
+
"loss": 19.705,
|
| 25748 |
+
"step": 36690
|
| 25749 |
+
},
|
| 25750 |
+
{
|
| 25751 |
+
"epoch": 0.6810532200409652,
|
| 25752 |
+
"grad_norm": 34.71875,
|
| 25753 |
+
"learning_rate": 9.89358546522434e-06,
|
| 25754 |
+
"loss": 19.4342,
|
| 25755 |
+
"step": 36700
|
| 25756 |
+
},
|
| 25757 |
+
{
|
| 25758 |
+
"epoch": 0.6812387931254451,
|
| 25759 |
+
"grad_norm": 34.65625,
|
| 25760 |
+
"learning_rate": 9.893556469438296e-06,
|
| 25761 |
+
"loss": 19.4025,
|
| 25762 |
+
"step": 36710
|
| 25763 |
+
},
|
| 25764 |
+
{
|
| 25765 |
+
"epoch": 0.6814243662099249,
|
| 25766 |
+
"grad_norm": 34.09375,
|
| 25767 |
+
"learning_rate": 9.893527473652255e-06,
|
| 25768 |
+
"loss": 19.8077,
|
| 25769 |
+
"step": 36720
|
| 25770 |
+
},
|
| 25771 |
+
{
|
| 25772 |
+
"epoch": 0.6816099392944047,
|
| 25773 |
+
"grad_norm": 34.46875,
|
| 25774 |
+
"learning_rate": 9.893498477866213e-06,
|
| 25775 |
+
"loss": 19.4602,
|
| 25776 |
+
"step": 36730
|
| 25777 |
+
},
|
| 25778 |
+
{
|
| 25779 |
+
"epoch": 0.6817955123788846,
|
| 25780 |
+
"grad_norm": 35.78125,
|
| 25781 |
+
"learning_rate": 9.89346948208017e-06,
|
| 25782 |
+
"loss": 19.6626,
|
| 25783 |
+
"step": 36740
|
| 25784 |
+
},
|
| 25785 |
+
{
|
| 25786 |
+
"epoch": 0.6819810854633644,
|
| 25787 |
+
"grad_norm": 34.3125,
|
| 25788 |
+
"learning_rate": 9.893440486294128e-06,
|
| 25789 |
+
"loss": 19.751,
|
| 25790 |
+
"step": 36750
|
| 25791 |
+
},
|
| 25792 |
+
{
|
| 25793 |
+
"epoch": 0.6821666585478442,
|
| 25794 |
+
"grad_norm": 33.3125,
|
| 25795 |
+
"learning_rate": 9.893411490508085e-06,
|
| 25796 |
+
"loss": 19.3236,
|
| 25797 |
+
"step": 36760
|
| 25798 |
+
},
|
| 25799 |
+
{
|
| 25800 |
+
"epoch": 0.6823522316323241,
|
| 25801 |
+
"grad_norm": 35.15625,
|
| 25802 |
+
"learning_rate": 9.893382494722042e-06,
|
| 25803 |
+
"loss": 19.6032,
|
| 25804 |
+
"step": 36770
|
| 25805 |
+
},
|
| 25806 |
+
{
|
| 25807 |
+
"epoch": 0.6825378047168039,
|
| 25808 |
+
"grad_norm": 36.8125,
|
| 25809 |
+
"learning_rate": 9.893353498936e-06,
|
| 25810 |
+
"loss": 19.9787,
|
| 25811 |
+
"step": 36780
|
| 25812 |
+
},
|
| 25813 |
+
{
|
| 25814 |
+
"epoch": 0.6827233778012837,
|
| 25815 |
+
"grad_norm": 37.0,
|
| 25816 |
+
"learning_rate": 9.893324503149957e-06,
|
| 25817 |
+
"loss": 20.0506,
|
| 25818 |
+
"step": 36790
|
| 25819 |
+
},
|
| 25820 |
+
{
|
| 25821 |
+
"epoch": 0.6829089508857635,
|
| 25822 |
+
"grad_norm": 35.6875,
|
| 25823 |
+
"learning_rate": 9.893295507363916e-06,
|
| 25824 |
+
"loss": 19.2396,
|
| 25825 |
+
"step": 36800
|
| 25826 |
+
},
|
| 25827 |
+
{
|
| 25828 |
+
"epoch": 0.6830945239702434,
|
| 25829 |
+
"grad_norm": 33.75,
|
| 25830 |
+
"learning_rate": 9.893266511577874e-06,
|
| 25831 |
+
"loss": 19.7276,
|
| 25832 |
+
"step": 36810
|
| 25833 |
+
},
|
| 25834 |
+
{
|
| 25835 |
+
"epoch": 0.6832800970547231,
|
| 25836 |
+
"grad_norm": 35.5,
|
| 25837 |
+
"learning_rate": 9.89323751579183e-06,
|
| 25838 |
+
"loss": 19.6616,
|
| 25839 |
+
"step": 36820
|
| 25840 |
+
},
|
| 25841 |
+
{
|
| 25842 |
+
"epoch": 0.683465670139203,
|
| 25843 |
+
"grad_norm": 35.625,
|
| 25844 |
+
"learning_rate": 9.893208520005789e-06,
|
| 25845 |
+
"loss": 19.53,
|
| 25846 |
+
"step": 36830
|
| 25847 |
+
},
|
| 25848 |
+
{
|
| 25849 |
+
"epoch": 0.6836512432236829,
|
| 25850 |
+
"grad_norm": 34.8125,
|
| 25851 |
+
"learning_rate": 9.893179524219746e-06,
|
| 25852 |
+
"loss": 19.5154,
|
| 25853 |
+
"step": 36840
|
| 25854 |
+
},
|
| 25855 |
+
{
|
| 25856 |
+
"epoch": 0.6838368163081626,
|
| 25857 |
+
"grad_norm": 34.53125,
|
| 25858 |
+
"learning_rate": 9.893150528433703e-06,
|
| 25859 |
+
"loss": 19.7291,
|
| 25860 |
+
"step": 36850
|
| 25861 |
+
},
|
| 25862 |
+
{
|
| 25863 |
+
"epoch": 0.6840223893926425,
|
| 25864 |
+
"grad_norm": 35.96875,
|
| 25865 |
+
"learning_rate": 9.89312153264766e-06,
|
| 25866 |
+
"loss": 19.5387,
|
| 25867 |
+
"step": 36860
|
| 25868 |
+
},
|
| 25869 |
+
{
|
| 25870 |
+
"epoch": 0.6842079624771223,
|
| 25871 |
+
"grad_norm": 35.53125,
|
| 25872 |
+
"learning_rate": 9.893092536861618e-06,
|
| 25873 |
+
"loss": 19.7129,
|
| 25874 |
+
"step": 36870
|
| 25875 |
+
},
|
| 25876 |
+
{
|
| 25877 |
+
"epoch": 0.6843935355616021,
|
| 25878 |
+
"grad_norm": 34.71875,
|
| 25879 |
+
"learning_rate": 9.893063541075576e-06,
|
| 25880 |
+
"loss": 19.8325,
|
| 25881 |
+
"step": 36880
|
| 25882 |
+
},
|
| 25883 |
+
{
|
| 25884 |
+
"epoch": 0.684579108646082,
|
| 25885 |
+
"grad_norm": 34.625,
|
| 25886 |
+
"learning_rate": 9.893034545289533e-06,
|
| 25887 |
+
"loss": 19.6929,
|
| 25888 |
+
"step": 36890
|
| 25889 |
+
},
|
| 25890 |
+
{
|
| 25891 |
+
"epoch": 0.6847646817305618,
|
| 25892 |
+
"grad_norm": 37.28125,
|
| 25893 |
+
"learning_rate": 9.893005549503492e-06,
|
| 25894 |
+
"loss": 19.5159,
|
| 25895 |
+
"step": 36900
|
| 25896 |
+
},
|
| 25897 |
+
{
|
| 25898 |
+
"epoch": 0.6849502548150417,
|
| 25899 |
+
"grad_norm": 34.28125,
|
| 25900 |
+
"learning_rate": 9.89297655371745e-06,
|
| 25901 |
+
"loss": 19.5611,
|
| 25902 |
+
"step": 36910
|
| 25903 |
+
},
|
| 25904 |
+
{
|
| 25905 |
+
"epoch": 0.6851358278995214,
|
| 25906 |
+
"grad_norm": 34.59375,
|
| 25907 |
+
"learning_rate": 9.892947557931407e-06,
|
| 25908 |
+
"loss": 19.5881,
|
| 25909 |
+
"step": 36920
|
| 25910 |
+
},
|
| 25911 |
+
{
|
| 25912 |
+
"epoch": 0.6853214009840013,
|
| 25913 |
+
"grad_norm": 35.0625,
|
| 25914 |
+
"learning_rate": 9.892918562145364e-06,
|
| 25915 |
+
"loss": 19.5037,
|
| 25916 |
+
"step": 36930
|
| 25917 |
+
},
|
| 25918 |
+
{
|
| 25919 |
+
"epoch": 0.6855069740684812,
|
| 25920 |
+
"grad_norm": 34.53125,
|
| 25921 |
+
"learning_rate": 9.892889566359322e-06,
|
| 25922 |
+
"loss": 19.7405,
|
| 25923 |
+
"step": 36940
|
| 25924 |
+
},
|
| 25925 |
+
{
|
| 25926 |
+
"epoch": 0.6856925471529609,
|
| 25927 |
+
"grad_norm": 34.96875,
|
| 25928 |
+
"learning_rate": 9.89286057057328e-06,
|
| 25929 |
+
"loss": 19.6976,
|
| 25930 |
+
"step": 36950
|
| 25931 |
+
},
|
| 25932 |
+
{
|
| 25933 |
+
"epoch": 0.6858781202374408,
|
| 25934 |
+
"grad_norm": 33.96875,
|
| 25935 |
+
"learning_rate": 9.892831574787237e-06,
|
| 25936 |
+
"loss": 19.5494,
|
| 25937 |
+
"step": 36960
|
| 25938 |
+
},
|
| 25939 |
+
{
|
| 25940 |
+
"epoch": 0.6860636933219206,
|
| 25941 |
+
"grad_norm": 36.40625,
|
| 25942 |
+
"learning_rate": 9.892802579001196e-06,
|
| 25943 |
+
"loss": 19.4275,
|
| 25944 |
+
"step": 36970
|
| 25945 |
+
},
|
| 25946 |
+
{
|
| 25947 |
+
"epoch": 0.6862492664064004,
|
| 25948 |
+
"grad_norm": 36.03125,
|
| 25949 |
+
"learning_rate": 9.892773583215151e-06,
|
| 25950 |
+
"loss": 19.6989,
|
| 25951 |
+
"step": 36980
|
| 25952 |
+
},
|
| 25953 |
+
{
|
| 25954 |
+
"epoch": 0.6864348394908802,
|
| 25955 |
+
"grad_norm": 37.59375,
|
| 25956 |
+
"learning_rate": 9.892744587429109e-06,
|
| 25957 |
+
"loss": 19.6063,
|
| 25958 |
+
"step": 36990
|
| 25959 |
+
},
|
| 25960 |
+
{
|
| 25961 |
+
"epoch": 0.6866204125753601,
|
| 25962 |
+
"grad_norm": 38.40625,
|
| 25963 |
+
"learning_rate": 9.892715591643068e-06,
|
| 25964 |
+
"loss": 19.5551,
|
| 25965 |
+
"step": 37000
|
| 25966 |
+
},
|
| 25967 |
+
{
|
| 25968 |
+
"epoch": 0.6868059856598399,
|
| 25969 |
+
"grad_norm": 36.21875,
|
| 25970 |
+
"learning_rate": 9.892686595857025e-06,
|
| 25971 |
+
"loss": 19.2527,
|
| 25972 |
+
"step": 37010
|
| 25973 |
+
},
|
| 25974 |
+
{
|
| 25975 |
+
"epoch": 0.6869915587443197,
|
| 25976 |
+
"grad_norm": 36.15625,
|
| 25977 |
+
"learning_rate": 9.892657600070983e-06,
|
| 25978 |
+
"loss": 19.5331,
|
| 25979 |
+
"step": 37020
|
| 25980 |
+
},
|
| 25981 |
+
{
|
| 25982 |
+
"epoch": 0.6871771318287996,
|
| 25983 |
+
"grad_norm": 35.5,
|
| 25984 |
+
"learning_rate": 9.89262860428494e-06,
|
| 25985 |
+
"loss": 19.0556,
|
| 25986 |
+
"step": 37030
|
| 25987 |
+
},
|
| 25988 |
+
{
|
| 25989 |
+
"epoch": 0.6873627049132793,
|
| 25990 |
+
"grad_norm": 34.125,
|
| 25991 |
+
"learning_rate": 9.892599608498898e-06,
|
| 25992 |
+
"loss": 19.6261,
|
| 25993 |
+
"step": 37040
|
| 25994 |
+
},
|
| 25995 |
+
{
|
| 25996 |
+
"epoch": 0.6875482779977592,
|
| 25997 |
+
"grad_norm": 37.3125,
|
| 25998 |
+
"learning_rate": 9.892570612712855e-06,
|
| 25999 |
+
"loss": 19.2994,
|
| 26000 |
+
"step": 37050
|
| 26001 |
+
},
|
| 26002 |
+
{
|
| 26003 |
+
"epoch": 0.6877338510822391,
|
| 26004 |
+
"grad_norm": 37.5625,
|
| 26005 |
+
"learning_rate": 9.892541616926812e-06,
|
| 26006 |
+
"loss": 19.6494,
|
| 26007 |
+
"step": 37060
|
| 26008 |
+
},
|
| 26009 |
+
{
|
| 26010 |
+
"epoch": 0.6879194241667188,
|
| 26011 |
+
"grad_norm": 34.96875,
|
| 26012 |
+
"learning_rate": 9.892512621140771e-06,
|
| 26013 |
+
"loss": 19.6874,
|
| 26014 |
+
"step": 37070
|
| 26015 |
+
},
|
| 26016 |
+
{
|
| 26017 |
+
"epoch": 0.6881049972511987,
|
| 26018 |
+
"grad_norm": 35.125,
|
| 26019 |
+
"learning_rate": 9.892483625354729e-06,
|
| 26020 |
+
"loss": 19.6228,
|
| 26021 |
+
"step": 37080
|
| 26022 |
+
},
|
| 26023 |
+
{
|
| 26024 |
+
"epoch": 0.6882905703356785,
|
| 26025 |
+
"grad_norm": 36.875,
|
| 26026 |
+
"learning_rate": 9.892454629568685e-06,
|
| 26027 |
+
"loss": 19.251,
|
| 26028 |
+
"step": 37090
|
| 26029 |
+
},
|
| 26030 |
+
{
|
| 26031 |
+
"epoch": 0.6884761434201584,
|
| 26032 |
+
"grad_norm": 34.90625,
|
| 26033 |
+
"learning_rate": 9.892425633782644e-06,
|
| 26034 |
+
"loss": 19.8272,
|
| 26035 |
+
"step": 37100
|
| 26036 |
+
},
|
| 26037 |
+
{
|
| 26038 |
+
"epoch": 0.6886617165046381,
|
| 26039 |
+
"grad_norm": 37.8125,
|
| 26040 |
+
"learning_rate": 9.892396637996601e-06,
|
| 26041 |
+
"loss": 19.4844,
|
| 26042 |
+
"step": 37110
|
| 26043 |
+
},
|
| 26044 |
+
{
|
| 26045 |
+
"epoch": 0.688847289589118,
|
| 26046 |
+
"grad_norm": 33.09375,
|
| 26047 |
+
"learning_rate": 9.892367642210558e-06,
|
| 26048 |
+
"loss": 19.3807,
|
| 26049 |
+
"step": 37120
|
| 26050 |
+
},
|
| 26051 |
+
{
|
| 26052 |
+
"epoch": 0.6890328626735979,
|
| 26053 |
+
"grad_norm": 32.59375,
|
| 26054 |
+
"learning_rate": 9.892338646424516e-06,
|
| 26055 |
+
"loss": 19.1829,
|
| 26056 |
+
"step": 37130
|
| 26057 |
+
},
|
| 26058 |
+
{
|
| 26059 |
+
"epoch": 0.6892184357580776,
|
| 26060 |
+
"grad_norm": 34.65625,
|
| 26061 |
+
"learning_rate": 9.892309650638473e-06,
|
| 26062 |
+
"loss": 19.678,
|
| 26063 |
+
"step": 37140
|
| 26064 |
+
},
|
| 26065 |
+
{
|
| 26066 |
+
"epoch": 0.6894040088425575,
|
| 26067 |
+
"grad_norm": 34.34375,
|
| 26068 |
+
"learning_rate": 9.89228065485243e-06,
|
| 26069 |
+
"loss": 20.2141,
|
| 26070 |
+
"step": 37150
|
| 26071 |
+
},
|
| 26072 |
+
{
|
| 26073 |
+
"epoch": 0.6895895819270373,
|
| 26074 |
+
"grad_norm": 35.125,
|
| 26075 |
+
"learning_rate": 9.892251659066388e-06,
|
| 26076 |
+
"loss": 19.9404,
|
| 26077 |
+
"step": 37160
|
| 26078 |
+
},
|
| 26079 |
+
{
|
| 26080 |
+
"epoch": 0.6897751550115171,
|
| 26081 |
+
"grad_norm": 34.71875,
|
| 26082 |
+
"learning_rate": 9.892222663280347e-06,
|
| 26083 |
+
"loss": 19.518,
|
| 26084 |
+
"step": 37170
|
| 26085 |
+
},
|
| 26086 |
+
{
|
| 26087 |
+
"epoch": 0.689960728095997,
|
| 26088 |
+
"grad_norm": 35.0625,
|
| 26089 |
+
"learning_rate": 9.892193667494305e-06,
|
| 26090 |
+
"loss": 19.57,
|
| 26091 |
+
"step": 37180
|
| 26092 |
+
},
|
| 26093 |
+
{
|
| 26094 |
+
"epoch": 0.6901463011804768,
|
| 26095 |
+
"grad_norm": 35.46875,
|
| 26096 |
+
"learning_rate": 9.892164671708262e-06,
|
| 26097 |
+
"loss": 19.5693,
|
| 26098 |
+
"step": 37190
|
| 26099 |
+
},
|
| 26100 |
+
{
|
| 26101 |
+
"epoch": 0.6903318742649566,
|
| 26102 |
+
"grad_norm": 35.59375,
|
| 26103 |
+
"learning_rate": 9.89213567592222e-06,
|
| 26104 |
+
"loss": 19.3073,
|
| 26105 |
+
"step": 37200
|
| 26106 |
+
},
|
| 26107 |
+
{
|
| 26108 |
+
"epoch": 0.6905174473494364,
|
| 26109 |
+
"grad_norm": 35.8125,
|
| 26110 |
+
"learning_rate": 9.892106680136177e-06,
|
| 26111 |
+
"loss": 19.3111,
|
| 26112 |
+
"step": 37210
|
| 26113 |
+
},
|
| 26114 |
+
{
|
| 26115 |
+
"epoch": 0.6907030204339163,
|
| 26116 |
+
"grad_norm": 34.46875,
|
| 26117 |
+
"learning_rate": 9.892077684350134e-06,
|
| 26118 |
+
"loss": 19.6284,
|
| 26119 |
+
"step": 37220
|
| 26120 |
+
},
|
| 26121 |
+
{
|
| 26122 |
+
"epoch": 0.690888593518396,
|
| 26123 |
+
"grad_norm": 35.1875,
|
| 26124 |
+
"learning_rate": 9.892048688564092e-06,
|
| 26125 |
+
"loss": 19.6832,
|
| 26126 |
+
"step": 37230
|
| 26127 |
+
},
|
| 26128 |
+
{
|
| 26129 |
+
"epoch": 0.6910741666028759,
|
| 26130 |
+
"grad_norm": 37.375,
|
| 26131 |
+
"learning_rate": 9.892019692778049e-06,
|
| 26132 |
+
"loss": 19.5748,
|
| 26133 |
+
"step": 37240
|
| 26134 |
+
},
|
| 26135 |
+
{
|
| 26136 |
+
"epoch": 0.6912597396873558,
|
| 26137 |
+
"grad_norm": 35.28125,
|
| 26138 |
+
"learning_rate": 9.891990696992006e-06,
|
| 26139 |
+
"loss": 19.7095,
|
| 26140 |
+
"step": 37250
|
| 26141 |
+
},
|
| 26142 |
+
{
|
| 26143 |
+
"epoch": 0.6914453127718355,
|
| 26144 |
+
"grad_norm": 35.15625,
|
| 26145 |
+
"learning_rate": 9.891961701205964e-06,
|
| 26146 |
+
"loss": 19.378,
|
| 26147 |
+
"step": 37260
|
| 26148 |
+
},
|
| 26149 |
+
{
|
| 26150 |
+
"epoch": 0.6916308858563154,
|
| 26151 |
+
"grad_norm": 34.625,
|
| 26152 |
+
"learning_rate": 9.891932705419921e-06,
|
| 26153 |
+
"loss": 19.7614,
|
| 26154 |
+
"step": 37270
|
| 26155 |
+
},
|
| 26156 |
+
{
|
| 26157 |
+
"epoch": 0.6918164589407952,
|
| 26158 |
+
"grad_norm": 37.125,
|
| 26159 |
+
"learning_rate": 9.89190370963388e-06,
|
| 26160 |
+
"loss": 19.5859,
|
| 26161 |
+
"step": 37280
|
| 26162 |
+
},
|
| 26163 |
+
{
|
| 26164 |
+
"epoch": 0.6920020320252751,
|
| 26165 |
+
"grad_norm": 35.71875,
|
| 26166 |
+
"learning_rate": 9.891874713847838e-06,
|
| 26167 |
+
"loss": 19.326,
|
| 26168 |
+
"step": 37290
|
| 26169 |
+
},
|
| 26170 |
+
{
|
| 26171 |
+
"epoch": 0.6921876051097549,
|
| 26172 |
+
"grad_norm": 35.03125,
|
| 26173 |
+
"learning_rate": 9.891845718061795e-06,
|
| 26174 |
+
"loss": 19.6066,
|
| 26175 |
+
"step": 37300
|
| 26176 |
+
},
|
| 26177 |
+
{
|
| 26178 |
+
"epoch": 0.6923731781942347,
|
| 26179 |
+
"grad_norm": 38.125,
|
| 26180 |
+
"learning_rate": 9.891816722275753e-06,
|
| 26181 |
+
"loss": 19.2031,
|
| 26182 |
+
"step": 37310
|
| 26183 |
+
},
|
| 26184 |
+
{
|
| 26185 |
+
"epoch": 0.6925587512787146,
|
| 26186 |
+
"grad_norm": 34.5,
|
| 26187 |
+
"learning_rate": 9.89178772648971e-06,
|
| 26188 |
+
"loss": 19.5362,
|
| 26189 |
+
"step": 37320
|
| 26190 |
+
},
|
| 26191 |
+
{
|
| 26192 |
+
"epoch": 0.6927443243631943,
|
| 26193 |
+
"grad_norm": 37.3125,
|
| 26194 |
+
"learning_rate": 9.891758730703667e-06,
|
| 26195 |
+
"loss": 19.9289,
|
| 26196 |
+
"step": 37330
|
| 26197 |
+
},
|
| 26198 |
+
{
|
| 26199 |
+
"epoch": 0.6929298974476742,
|
| 26200 |
+
"grad_norm": 35.65625,
|
| 26201 |
+
"learning_rate": 9.891729734917625e-06,
|
| 26202 |
+
"loss": 19.7964,
|
| 26203 |
+
"step": 37340
|
| 26204 |
+
},
|
| 26205 |
+
{
|
| 26206 |
+
"epoch": 0.693115470532154,
|
| 26207 |
+
"grad_norm": 36.75,
|
| 26208 |
+
"learning_rate": 9.891700739131584e-06,
|
| 26209 |
+
"loss": 19.2571,
|
| 26210 |
+
"step": 37350
|
| 26211 |
+
},
|
| 26212 |
+
{
|
| 26213 |
+
"epoch": 0.6933010436166338,
|
| 26214 |
+
"grad_norm": 35.1875,
|
| 26215 |
+
"learning_rate": 9.89167174334554e-06,
|
| 26216 |
+
"loss": 19.4191,
|
| 26217 |
+
"step": 37360
|
| 26218 |
+
},
|
| 26219 |
+
{
|
| 26220 |
+
"epoch": 0.6934866167011137,
|
| 26221 |
+
"grad_norm": 34.5625,
|
| 26222 |
+
"learning_rate": 9.891642747559497e-06,
|
| 26223 |
+
"loss": 19.1347,
|
| 26224 |
+
"step": 37370
|
| 26225 |
+
},
|
| 26226 |
+
{
|
| 26227 |
+
"epoch": 0.6936721897855935,
|
| 26228 |
+
"grad_norm": 38.59375,
|
| 26229 |
+
"learning_rate": 9.891613751773456e-06,
|
| 26230 |
+
"loss": 19.4612,
|
| 26231 |
+
"step": 37380
|
| 26232 |
+
},
|
| 26233 |
+
{
|
| 26234 |
+
"epoch": 0.6938577628700733,
|
| 26235 |
+
"grad_norm": 35.25,
|
| 26236 |
+
"learning_rate": 9.891584755987414e-06,
|
| 26237 |
+
"loss": 19.9225,
|
| 26238 |
+
"step": 37390
|
| 26239 |
+
},
|
| 26240 |
+
{
|
| 26241 |
+
"epoch": 0.6940433359545531,
|
| 26242 |
+
"grad_norm": 34.5625,
|
| 26243 |
+
"learning_rate": 9.891555760201371e-06,
|
| 26244 |
+
"loss": 19.8018,
|
| 26245 |
+
"step": 37400
|
| 26246 |
+
},
|
| 26247 |
+
{
|
| 26248 |
+
"epoch": 0.694228909039033,
|
| 26249 |
+
"grad_norm": 35.375,
|
| 26250 |
+
"learning_rate": 9.891526764415328e-06,
|
| 26251 |
+
"loss": 19.4325,
|
| 26252 |
+
"step": 37410
|
| 26253 |
+
},
|
| 26254 |
+
{
|
| 26255 |
+
"epoch": 0.6944144821235128,
|
| 26256 |
+
"grad_norm": 33.3125,
|
| 26257 |
+
"learning_rate": 9.891497768629286e-06,
|
| 26258 |
+
"loss": 19.7925,
|
| 26259 |
+
"step": 37420
|
| 26260 |
+
},
|
| 26261 |
+
{
|
| 26262 |
+
"epoch": 0.6946000552079926,
|
| 26263 |
+
"grad_norm": 37.28125,
|
| 26264 |
+
"learning_rate": 9.891468772843243e-06,
|
| 26265 |
+
"loss": 19.9303,
|
| 26266 |
+
"step": 37430
|
| 26267 |
+
},
|
| 26268 |
+
{
|
| 26269 |
+
"epoch": 0.6947856282924725,
|
| 26270 |
+
"grad_norm": 35.46875,
|
| 26271 |
+
"learning_rate": 9.8914397770572e-06,
|
| 26272 |
+
"loss": 19.8091,
|
| 26273 |
+
"step": 37440
|
| 26274 |
+
},
|
| 26275 |
+
{
|
| 26276 |
+
"epoch": 0.6949712013769523,
|
| 26277 |
+
"grad_norm": 37.6875,
|
| 26278 |
+
"learning_rate": 9.89141078127116e-06,
|
| 26279 |
+
"loss": 19.6351,
|
| 26280 |
+
"step": 37450
|
| 26281 |
+
},
|
| 26282 |
+
{
|
| 26283 |
+
"epoch": 0.6951567744614321,
|
| 26284 |
+
"grad_norm": 35.90625,
|
| 26285 |
+
"learning_rate": 9.891381785485115e-06,
|
| 26286 |
+
"loss": 19.8791,
|
| 26287 |
+
"step": 37460
|
| 26288 |
+
},
|
| 26289 |
+
{
|
| 26290 |
+
"epoch": 0.695342347545912,
|
| 26291 |
+
"grad_norm": 36.34375,
|
| 26292 |
+
"learning_rate": 9.891352789699073e-06,
|
| 26293 |
+
"loss": 19.6898,
|
| 26294 |
+
"step": 37470
|
| 26295 |
+
},
|
| 26296 |
+
{
|
| 26297 |
+
"epoch": 0.6955279206303918,
|
| 26298 |
+
"grad_norm": 35.6875,
|
| 26299 |
+
"learning_rate": 9.891323793913032e-06,
|
| 26300 |
+
"loss": 19.9378,
|
| 26301 |
+
"step": 37480
|
| 26302 |
+
},
|
| 26303 |
+
{
|
| 26304 |
+
"epoch": 0.6957134937148716,
|
| 26305 |
+
"grad_norm": 36.59375,
|
| 26306 |
+
"learning_rate": 9.89129479812699e-06,
|
| 26307 |
+
"loss": 19.562,
|
| 26308 |
+
"step": 37490
|
| 26309 |
+
},
|
| 26310 |
+
{
|
| 26311 |
+
"epoch": 0.6958990667993514,
|
| 26312 |
+
"grad_norm": 36.21875,
|
| 26313 |
+
"learning_rate": 9.891265802340947e-06,
|
| 26314 |
+
"loss": 19.947,
|
| 26315 |
+
"step": 37500
|
| 26316 |
+
},
|
| 26317 |
+
{
|
| 26318 |
+
"epoch": 0.6960846398838313,
|
| 26319 |
+
"grad_norm": 35.0625,
|
| 26320 |
+
"learning_rate": 9.891236806554904e-06,
|
| 26321 |
+
"loss": 19.8907,
|
| 26322 |
+
"step": 37510
|
| 26323 |
+
},
|
| 26324 |
+
{
|
| 26325 |
+
"epoch": 0.696270212968311,
|
| 26326 |
+
"grad_norm": 36.71875,
|
| 26327 |
+
"learning_rate": 9.891207810768862e-06,
|
| 26328 |
+
"loss": 20.0342,
|
| 26329 |
+
"step": 37520
|
| 26330 |
+
},
|
| 26331 |
+
{
|
| 26332 |
+
"epoch": 0.6964557860527909,
|
| 26333 |
+
"grad_norm": 35.09375,
|
| 26334 |
+
"learning_rate": 9.891178814982819e-06,
|
| 26335 |
+
"loss": 19.3272,
|
| 26336 |
+
"step": 37530
|
| 26337 |
+
},
|
| 26338 |
+
{
|
| 26339 |
+
"epoch": 0.6966413591372708,
|
| 26340 |
+
"grad_norm": 36.34375,
|
| 26341 |
+
"learning_rate": 9.891149819196776e-06,
|
| 26342 |
+
"loss": 19.3683,
|
| 26343 |
+
"step": 37540
|
| 26344 |
+
},
|
| 26345 |
+
{
|
| 26346 |
+
"epoch": 0.6968269322217505,
|
| 26347 |
+
"grad_norm": 36.25,
|
| 26348 |
+
"learning_rate": 9.891120823410735e-06,
|
| 26349 |
+
"loss": 19.7305,
|
| 26350 |
+
"step": 37550
|
| 26351 |
+
},
|
| 26352 |
+
{
|
| 26353 |
+
"epoch": 0.6970125053062304,
|
| 26354 |
+
"grad_norm": 35.25,
|
| 26355 |
+
"learning_rate": 9.891091827624693e-06,
|
| 26356 |
+
"loss": 19.2455,
|
| 26357 |
+
"step": 37560
|
| 26358 |
+
},
|
| 26359 |
+
{
|
| 26360 |
+
"epoch": 0.6971980783907102,
|
| 26361 |
+
"grad_norm": 37.25,
|
| 26362 |
+
"learning_rate": 9.891062831838649e-06,
|
| 26363 |
+
"loss": 19.4296,
|
| 26364 |
+
"step": 37570
|
| 26365 |
+
},
|
| 26366 |
+
{
|
| 26367 |
+
"epoch": 0.69738365147519,
|
| 26368 |
+
"grad_norm": 35.84375,
|
| 26369 |
+
"learning_rate": 9.891033836052608e-06,
|
| 26370 |
+
"loss": 20.02,
|
| 26371 |
+
"step": 37580
|
| 26372 |
+
},
|
| 26373 |
+
{
|
| 26374 |
+
"epoch": 0.6975692245596699,
|
| 26375 |
+
"grad_norm": 34.3125,
|
| 26376 |
+
"learning_rate": 9.891004840266565e-06,
|
| 26377 |
+
"loss": 19.2752,
|
| 26378 |
+
"step": 37590
|
| 26379 |
+
},
|
| 26380 |
+
{
|
| 26381 |
+
"epoch": 0.6977547976441497,
|
| 26382 |
+
"grad_norm": 36.21875,
|
| 26383 |
+
"learning_rate": 9.890975844480523e-06,
|
| 26384 |
+
"loss": 19.6226,
|
| 26385 |
+
"step": 37600
|
| 26386 |
+
},
|
| 26387 |
+
{
|
| 26388 |
+
"epoch": 0.6979403707286295,
|
| 26389 |
+
"grad_norm": 36.1875,
|
| 26390 |
+
"learning_rate": 9.89094684869448e-06,
|
| 26391 |
+
"loss": 19.3974,
|
| 26392 |
+
"step": 37610
|
| 26393 |
+
},
|
| 26394 |
+
{
|
| 26395 |
+
"epoch": 0.6981259438131093,
|
| 26396 |
+
"grad_norm": 34.3125,
|
| 26397 |
+
"learning_rate": 9.890917852908437e-06,
|
| 26398 |
+
"loss": 19.5617,
|
| 26399 |
+
"step": 37620
|
| 26400 |
+
},
|
| 26401 |
+
{
|
| 26402 |
+
"epoch": 0.6983115168975892,
|
| 26403 |
+
"grad_norm": 34.46875,
|
| 26404 |
+
"learning_rate": 9.890888857122395e-06,
|
| 26405 |
+
"loss": 19.4508,
|
| 26406 |
+
"step": 37630
|
| 26407 |
+
},
|
| 26408 |
+
{
|
| 26409 |
+
"epoch": 0.698497089982069,
|
| 26410 |
+
"grad_norm": 35.4375,
|
| 26411 |
+
"learning_rate": 9.890859861336352e-06,
|
| 26412 |
+
"loss": 19.2594,
|
| 26413 |
+
"step": 37640
|
| 26414 |
+
},
|
| 26415 |
+
{
|
| 26416 |
+
"epoch": 0.6986826630665488,
|
| 26417 |
+
"grad_norm": 36.59375,
|
| 26418 |
+
"learning_rate": 9.890830865550311e-06,
|
| 26419 |
+
"loss": 19.4359,
|
| 26420 |
+
"step": 37650
|
| 26421 |
+
},
|
| 26422 |
+
{
|
| 26423 |
+
"epoch": 0.6988682361510287,
|
| 26424 |
+
"grad_norm": 34.53125,
|
| 26425 |
+
"learning_rate": 9.890801869764269e-06,
|
| 26426 |
+
"loss": 19.662,
|
| 26427 |
+
"step": 37660
|
| 26428 |
+
},
|
| 26429 |
+
{
|
| 26430 |
+
"epoch": 0.6990538092355085,
|
| 26431 |
+
"grad_norm": 34.6875,
|
| 26432 |
+
"learning_rate": 9.890772873978226e-06,
|
| 26433 |
+
"loss": 19.5305,
|
| 26434 |
+
"step": 37670
|
| 26435 |
+
},
|
| 26436 |
+
{
|
| 26437 |
+
"epoch": 0.6992393823199883,
|
| 26438 |
+
"grad_norm": 34.09375,
|
| 26439 |
+
"learning_rate": 9.890743878192183e-06,
|
| 26440 |
+
"loss": 19.688,
|
| 26441 |
+
"step": 37680
|
| 26442 |
+
},
|
| 26443 |
+
{
|
| 26444 |
+
"epoch": 0.6994249554044681,
|
| 26445 |
+
"grad_norm": 35.15625,
|
| 26446 |
+
"learning_rate": 9.890714882406141e-06,
|
| 26447 |
+
"loss": 19.4327,
|
| 26448 |
+
"step": 37690
|
| 26449 |
+
},
|
| 26450 |
+
{
|
| 26451 |
+
"epoch": 0.699610528488948,
|
| 26452 |
+
"grad_norm": 36.375,
|
| 26453 |
+
"learning_rate": 9.890685886620098e-06,
|
| 26454 |
+
"loss": 19.6052,
|
| 26455 |
+
"step": 37700
|
| 26456 |
+
},
|
| 26457 |
+
{
|
| 26458 |
+
"epoch": 0.6997961015734278,
|
| 26459 |
+
"grad_norm": 34.125,
|
| 26460 |
+
"learning_rate": 9.890656890834056e-06,
|
| 26461 |
+
"loss": 19.9092,
|
| 26462 |
+
"step": 37710
|
| 26463 |
+
},
|
| 26464 |
+
{
|
| 26465 |
+
"epoch": 0.6999816746579076,
|
| 26466 |
+
"grad_norm": 35.78125,
|
| 26467 |
+
"learning_rate": 9.890627895048013e-06,
|
| 26468 |
+
"loss": 20.0716,
|
| 26469 |
+
"step": 37720
|
| 26470 |
+
},
|
| 26471 |
+
{
|
| 26472 |
+
"epoch": 0.7001672477423875,
|
| 26473 |
+
"grad_norm": 35.90625,
|
| 26474 |
+
"learning_rate": 9.89059889926197e-06,
|
| 26475 |
+
"loss": 19.438,
|
| 26476 |
+
"step": 37730
|
| 26477 |
+
},
|
| 26478 |
+
{
|
| 26479 |
+
"epoch": 0.7003528208268672,
|
| 26480 |
+
"grad_norm": 35.8125,
|
| 26481 |
+
"learning_rate": 9.890569903475928e-06,
|
| 26482 |
+
"loss": 19.2955,
|
| 26483 |
+
"step": 37740
|
| 26484 |
+
},
|
| 26485 |
+
{
|
| 26486 |
+
"epoch": 0.7005383939113471,
|
| 26487 |
+
"grad_norm": 37.78125,
|
| 26488 |
+
"learning_rate": 9.890540907689887e-06,
|
| 26489 |
+
"loss": 19.7556,
|
| 26490 |
+
"step": 37750
|
| 26491 |
+
},
|
| 26492 |
+
{
|
| 26493 |
+
"epoch": 0.700723966995827,
|
| 26494 |
+
"grad_norm": 34.5625,
|
| 26495 |
+
"learning_rate": 9.890511911903844e-06,
|
| 26496 |
+
"loss": 19.8376,
|
| 26497 |
+
"step": 37760
|
| 26498 |
+
},
|
| 26499 |
+
{
|
| 26500 |
+
"epoch": 0.7009095400803067,
|
| 26501 |
+
"grad_norm": 34.65625,
|
| 26502 |
+
"learning_rate": 9.890482916117802e-06,
|
| 26503 |
+
"loss": 19.174,
|
| 26504 |
+
"step": 37770
|
| 26505 |
+
},
|
| 26506 |
+
{
|
| 26507 |
+
"epoch": 0.7010951131647866,
|
| 26508 |
+
"grad_norm": 36.3125,
|
| 26509 |
+
"learning_rate": 9.89045392033176e-06,
|
| 26510 |
+
"loss": 19.5626,
|
| 26511 |
+
"step": 37780
|
| 26512 |
+
},
|
| 26513 |
+
{
|
| 26514 |
+
"epoch": 0.7012806862492664,
|
| 26515 |
+
"grad_norm": 35.0625,
|
| 26516 |
+
"learning_rate": 9.890424924545717e-06,
|
| 26517 |
+
"loss": 19.6777,
|
| 26518 |
+
"step": 37790
|
| 26519 |
+
},
|
| 26520 |
+
{
|
| 26521 |
+
"epoch": 0.7014662593337462,
|
| 26522 |
+
"grad_norm": 34.28125,
|
| 26523 |
+
"learning_rate": 9.890395928759674e-06,
|
| 26524 |
+
"loss": 19.4296,
|
| 26525 |
+
"step": 37800
|
| 26526 |
+
},
|
| 26527 |
+
{
|
| 26528 |
+
"epoch": 0.701651832418226,
|
| 26529 |
+
"grad_norm": 35.125,
|
| 26530 |
+
"learning_rate": 9.890366932973631e-06,
|
| 26531 |
+
"loss": 19.8895,
|
| 26532 |
+
"step": 37810
|
| 26533 |
+
},
|
| 26534 |
+
{
|
| 26535 |
+
"epoch": 0.7018374055027059,
|
| 26536 |
+
"grad_norm": 35.40625,
|
| 26537 |
+
"learning_rate": 9.890337937187589e-06,
|
| 26538 |
+
"loss": 19.6531,
|
| 26539 |
+
"step": 37820
|
| 26540 |
+
},
|
| 26541 |
+
{
|
| 26542 |
+
"epoch": 0.7020229785871858,
|
| 26543 |
+
"grad_norm": 34.6875,
|
| 26544 |
+
"learning_rate": 9.890308941401548e-06,
|
| 26545 |
+
"loss": 20.0812,
|
| 26546 |
+
"step": 37830
|
| 26547 |
+
},
|
| 26548 |
+
{
|
| 26549 |
+
"epoch": 0.7022085516716655,
|
| 26550 |
+
"grad_norm": 37.1875,
|
| 26551 |
+
"learning_rate": 9.890279945615504e-06,
|
| 26552 |
+
"loss": 19.4826,
|
| 26553 |
+
"step": 37840
|
| 26554 |
+
},
|
| 26555 |
+
{
|
| 26556 |
+
"epoch": 0.7023941247561454,
|
| 26557 |
+
"grad_norm": 35.1875,
|
| 26558 |
+
"learning_rate": 9.890250949829461e-06,
|
| 26559 |
+
"loss": 19.8685,
|
| 26560 |
+
"step": 37850
|
| 26561 |
+
},
|
| 26562 |
+
{
|
| 26563 |
+
"epoch": 0.7025796978406252,
|
| 26564 |
+
"grad_norm": 36.9375,
|
| 26565 |
+
"learning_rate": 9.89022195404342e-06,
|
| 26566 |
+
"loss": 20.043,
|
| 26567 |
+
"step": 37860
|
| 26568 |
+
},
|
| 26569 |
+
{
|
| 26570 |
+
"epoch": 0.702765270925105,
|
| 26571 |
+
"grad_norm": 37.09375,
|
| 26572 |
+
"learning_rate": 9.890192958257378e-06,
|
| 26573 |
+
"loss": 19.6171,
|
| 26574 |
+
"step": 37870
|
| 26575 |
+
},
|
| 26576 |
+
{
|
| 26577 |
+
"epoch": 0.7029508440095849,
|
| 26578 |
+
"grad_norm": 36.03125,
|
| 26579 |
+
"learning_rate": 9.890163962471335e-06,
|
| 26580 |
+
"loss": 19.0865,
|
| 26581 |
+
"step": 37880
|
| 26582 |
+
},
|
| 26583 |
+
{
|
| 26584 |
+
"epoch": 0.7031364170940647,
|
| 26585 |
+
"grad_norm": 35.09375,
|
| 26586 |
+
"learning_rate": 9.890134966685292e-06,
|
| 26587 |
+
"loss": 19.6744,
|
| 26588 |
+
"step": 37890
|
| 26589 |
+
},
|
| 26590 |
+
{
|
| 26591 |
+
"epoch": 0.7033219901785445,
|
| 26592 |
+
"grad_norm": 35.96875,
|
| 26593 |
+
"learning_rate": 9.89010597089925e-06,
|
| 26594 |
+
"loss": 19.748,
|
| 26595 |
+
"step": 37900
|
| 26596 |
+
},
|
| 26597 |
+
{
|
| 26598 |
+
"epoch": 0.7035075632630243,
|
| 26599 |
+
"grad_norm": 36.1875,
|
| 26600 |
+
"learning_rate": 9.890076975113207e-06,
|
| 26601 |
+
"loss": 19.6734,
|
| 26602 |
+
"step": 37910
|
| 26603 |
+
},
|
| 26604 |
+
{
|
| 26605 |
+
"epoch": 0.7036931363475042,
|
| 26606 |
+
"grad_norm": 33.34375,
|
| 26607 |
+
"learning_rate": 9.890047979327165e-06,
|
| 26608 |
+
"loss": 19.5977,
|
| 26609 |
+
"step": 37920
|
| 26610 |
+
},
|
| 26611 |
+
{
|
| 26612 |
+
"epoch": 0.7038787094319839,
|
| 26613 |
+
"grad_norm": 35.65625,
|
| 26614 |
+
"learning_rate": 9.890018983541124e-06,
|
| 26615 |
+
"loss": 19.6002,
|
| 26616 |
+
"step": 37930
|
| 26617 |
+
},
|
| 26618 |
+
{
|
| 26619 |
+
"epoch": 0.7040642825164638,
|
| 26620 |
+
"grad_norm": 36.375,
|
| 26621 |
+
"learning_rate": 9.889989987755081e-06,
|
| 26622 |
+
"loss": 19.5954,
|
| 26623 |
+
"step": 37940
|
| 26624 |
+
},
|
| 26625 |
+
{
|
| 26626 |
+
"epoch": 0.7042498556009437,
|
| 26627 |
+
"grad_norm": 34.375,
|
| 26628 |
+
"learning_rate": 9.889960991969037e-06,
|
| 26629 |
+
"loss": 19.6573,
|
| 26630 |
+
"step": 37950
|
| 26631 |
+
},
|
| 26632 |
+
{
|
| 26633 |
+
"epoch": 0.7044354286854234,
|
| 26634 |
+
"grad_norm": 36.5625,
|
| 26635 |
+
"learning_rate": 9.889931996182996e-06,
|
| 26636 |
+
"loss": 19.284,
|
| 26637 |
+
"step": 37960
|
| 26638 |
+
},
|
| 26639 |
+
{
|
| 26640 |
+
"epoch": 0.7046210017699033,
|
| 26641 |
+
"grad_norm": 34.125,
|
| 26642 |
+
"learning_rate": 9.889903000396953e-06,
|
| 26643 |
+
"loss": 19.5239,
|
| 26644 |
+
"step": 37970
|
| 26645 |
+
},
|
| 26646 |
+
{
|
| 26647 |
+
"epoch": 0.7048065748543831,
|
| 26648 |
+
"grad_norm": 34.09375,
|
| 26649 |
+
"learning_rate": 9.88987400461091e-06,
|
| 26650 |
+
"loss": 19.323,
|
| 26651 |
+
"step": 37980
|
| 26652 |
+
},
|
| 26653 |
+
{
|
| 26654 |
+
"epoch": 0.704992147938863,
|
| 26655 |
+
"grad_norm": 35.65625,
|
| 26656 |
+
"learning_rate": 9.889845008824868e-06,
|
| 26657 |
+
"loss": 18.7075,
|
| 26658 |
+
"step": 37990
|
| 26659 |
+
},
|
| 26660 |
+
{
|
| 26661 |
+
"epoch": 0.7051777210233428,
|
| 26662 |
+
"grad_norm": 35.59375,
|
| 26663 |
+
"learning_rate": 9.889816013038826e-06,
|
| 26664 |
+
"loss": 19.7255,
|
| 26665 |
+
"step": 38000
|
| 26666 |
+
},
|
| 26667 |
+
{
|
| 26668 |
+
"epoch": 0.7053632941078226,
|
| 26669 |
+
"grad_norm": 36.0625,
|
| 26670 |
+
"learning_rate": 9.889787017252783e-06,
|
| 26671 |
+
"loss": 19.4898,
|
| 26672 |
+
"step": 38010
|
| 26673 |
+
},
|
| 26674 |
+
{
|
| 26675 |
+
"epoch": 0.7055488671923025,
|
| 26676 |
+
"grad_norm": 36.0625,
|
| 26677 |
+
"learning_rate": 9.88975802146674e-06,
|
| 26678 |
+
"loss": 19.3605,
|
| 26679 |
+
"step": 38020
|
| 26680 |
+
},
|
| 26681 |
+
{
|
| 26682 |
+
"epoch": 0.7057344402767822,
|
| 26683 |
+
"grad_norm": 33.9375,
|
| 26684 |
+
"learning_rate": 9.8897290256807e-06,
|
| 26685 |
+
"loss": 19.6154,
|
| 26686 |
+
"step": 38030
|
| 26687 |
+
},
|
| 26688 |
+
{
|
| 26689 |
+
"epoch": 0.7059200133612621,
|
| 26690 |
+
"grad_norm": 35.65625,
|
| 26691 |
+
"learning_rate": 9.889700029894657e-06,
|
| 26692 |
+
"loss": 19.6784,
|
| 26693 |
+
"step": 38040
|
| 26694 |
+
},
|
| 26695 |
+
{
|
| 26696 |
+
"epoch": 0.706105586445742,
|
| 26697 |
+
"grad_norm": 35.15625,
|
| 26698 |
+
"learning_rate": 9.889671034108613e-06,
|
| 26699 |
+
"loss": 19.6619,
|
| 26700 |
+
"step": 38050
|
| 26701 |
+
},
|
| 26702 |
+
{
|
| 26703 |
+
"epoch": 0.7062911595302217,
|
| 26704 |
+
"grad_norm": 35.84375,
|
| 26705 |
+
"learning_rate": 9.889642038322572e-06,
|
| 26706 |
+
"loss": 18.8527,
|
| 26707 |
+
"step": 38060
|
| 26708 |
+
},
|
| 26709 |
+
{
|
| 26710 |
+
"epoch": 0.7064767326147016,
|
| 26711 |
+
"grad_norm": 34.5,
|
| 26712 |
+
"learning_rate": 9.889613042536529e-06,
|
| 26713 |
+
"loss": 19.0215,
|
| 26714 |
+
"step": 38070
|
| 26715 |
+
},
|
| 26716 |
+
{
|
| 26717 |
+
"epoch": 0.7066623056991814,
|
| 26718 |
+
"grad_norm": 34.90625,
|
| 26719 |
+
"learning_rate": 9.889584046750487e-06,
|
| 26720 |
+
"loss": 19.3586,
|
| 26721 |
+
"step": 38080
|
| 26722 |
+
},
|
| 26723 |
+
{
|
| 26724 |
+
"epoch": 0.7068478787836612,
|
| 26725 |
+
"grad_norm": 37.1875,
|
| 26726 |
+
"learning_rate": 9.889555050964444e-06,
|
| 26727 |
+
"loss": 19.8383,
|
| 26728 |
+
"step": 38090
|
| 26729 |
+
},
|
| 26730 |
+
{
|
| 26731 |
+
"epoch": 0.707033451868141,
|
| 26732 |
+
"grad_norm": 35.8125,
|
| 26733 |
+
"learning_rate": 9.889526055178403e-06,
|
| 26734 |
+
"loss": 19.9191,
|
| 26735 |
+
"step": 38100
|
| 26736 |
+
},
|
| 26737 |
+
{
|
| 26738 |
+
"epoch": 0.7072190249526209,
|
| 26739 |
+
"grad_norm": 35.875,
|
| 26740 |
+
"learning_rate": 9.889497059392359e-06,
|
| 26741 |
+
"loss": 19.1323,
|
| 26742 |
+
"step": 38110
|
| 26743 |
+
},
|
| 26744 |
+
{
|
| 26745 |
+
"epoch": 0.7074045980371007,
|
| 26746 |
+
"grad_norm": 36.0,
|
| 26747 |
+
"learning_rate": 9.889468063606316e-06,
|
| 26748 |
+
"loss": 20.3555,
|
| 26749 |
+
"step": 38120
|
| 26750 |
+
},
|
| 26751 |
+
{
|
| 26752 |
+
"epoch": 0.7075901711215805,
|
| 26753 |
+
"grad_norm": 36.1875,
|
| 26754 |
+
"learning_rate": 9.889439067820275e-06,
|
| 26755 |
+
"loss": 19.5806,
|
| 26756 |
+
"step": 38130
|
| 26757 |
+
},
|
| 26758 |
+
{
|
| 26759 |
+
"epoch": 0.7077757442060604,
|
| 26760 |
+
"grad_norm": 36.25,
|
| 26761 |
+
"learning_rate": 9.889410072034233e-06,
|
| 26762 |
+
"loss": 19.4524,
|
| 26763 |
+
"step": 38140
|
| 26764 |
+
},
|
| 26765 |
+
{
|
| 26766 |
+
"epoch": 0.7079613172905401,
|
| 26767 |
+
"grad_norm": 34.3125,
|
| 26768 |
+
"learning_rate": 9.88938107624819e-06,
|
| 26769 |
+
"loss": 18.9645,
|
| 26770 |
+
"step": 38150
|
| 26771 |
+
},
|
| 26772 |
+
{
|
| 26773 |
+
"epoch": 0.70814689037502,
|
| 26774 |
+
"grad_norm": 33.875,
|
| 26775 |
+
"learning_rate": 9.889352080462147e-06,
|
| 26776 |
+
"loss": 19.4386,
|
| 26777 |
+
"step": 38160
|
| 26778 |
+
},
|
| 26779 |
+
{
|
| 26780 |
+
"epoch": 0.7083324634594999,
|
| 26781 |
+
"grad_norm": 33.625,
|
| 26782 |
+
"learning_rate": 9.889323084676105e-06,
|
| 26783 |
+
"loss": 19.4483,
|
| 26784 |
+
"step": 38170
|
| 26785 |
+
},
|
| 26786 |
+
{
|
| 26787 |
+
"epoch": 0.7085180365439797,
|
| 26788 |
+
"grad_norm": 37.375,
|
| 26789 |
+
"learning_rate": 9.889294088890062e-06,
|
| 26790 |
+
"loss": 19.0382,
|
| 26791 |
+
"step": 38180
|
| 26792 |
+
},
|
| 26793 |
+
{
|
| 26794 |
+
"epoch": 0.7087036096284595,
|
| 26795 |
+
"grad_norm": 35.90625,
|
| 26796 |
+
"learning_rate": 9.88926509310402e-06,
|
| 26797 |
+
"loss": 19.0689,
|
| 26798 |
+
"step": 38190
|
| 26799 |
+
},
|
| 26800 |
+
{
|
| 26801 |
+
"epoch": 0.7088891827129393,
|
| 26802 |
+
"grad_norm": 37.3125,
|
| 26803 |
+
"learning_rate": 9.889236097317979e-06,
|
| 26804 |
+
"loss": 19.3899,
|
| 26805 |
+
"step": 38200
|
| 26806 |
+
},
|
| 26807 |
+
{
|
| 26808 |
+
"epoch": 0.7090747557974192,
|
| 26809 |
+
"grad_norm": 34.09375,
|
| 26810 |
+
"learning_rate": 9.889207101531935e-06,
|
| 26811 |
+
"loss": 19.3655,
|
| 26812 |
+
"step": 38210
|
| 26813 |
+
},
|
| 26814 |
+
{
|
| 26815 |
+
"epoch": 0.7092603288818989,
|
| 26816 |
+
"grad_norm": 35.875,
|
| 26817 |
+
"learning_rate": 9.889178105745892e-06,
|
| 26818 |
+
"loss": 19.9081,
|
| 26819 |
+
"step": 38220
|
| 26820 |
+
},
|
| 26821 |
+
{
|
| 26822 |
+
"epoch": 0.7094459019663788,
|
| 26823 |
+
"grad_norm": 36.625,
|
| 26824 |
+
"learning_rate": 9.889149109959851e-06,
|
| 26825 |
+
"loss": 19.4929,
|
| 26826 |
+
"step": 38230
|
| 26827 |
+
},
|
| 26828 |
+
{
|
| 26829 |
+
"epoch": 0.7096314750508587,
|
| 26830 |
+
"grad_norm": 34.0,
|
| 26831 |
+
"learning_rate": 9.889120114173808e-06,
|
| 26832 |
+
"loss": 19.5349,
|
| 26833 |
+
"step": 38240
|
| 26834 |
+
},
|
| 26835 |
+
{
|
| 26836 |
+
"epoch": 0.7098170481353384,
|
| 26837 |
+
"grad_norm": 35.75,
|
| 26838 |
+
"learning_rate": 9.889091118387766e-06,
|
| 26839 |
+
"loss": 19.5176,
|
| 26840 |
+
"step": 38250
|
| 26841 |
+
},
|
| 26842 |
+
{
|
| 26843 |
+
"epoch": 0.7100026212198183,
|
| 26844 |
+
"grad_norm": 34.96875,
|
| 26845 |
+
"learning_rate": 9.889062122601723e-06,
|
| 26846 |
+
"loss": 19.3043,
|
| 26847 |
+
"step": 38260
|
| 26848 |
+
},
|
| 26849 |
+
{
|
| 26850 |
+
"epoch": 0.7101881943042981,
|
| 26851 |
+
"grad_norm": 35.34375,
|
| 26852 |
+
"learning_rate": 9.88903312681568e-06,
|
| 26853 |
+
"loss": 20.086,
|
| 26854 |
+
"step": 38270
|
| 26855 |
+
},
|
| 26856 |
+
{
|
| 26857 |
+
"epoch": 0.7103737673887779,
|
| 26858 |
+
"grad_norm": 36.03125,
|
| 26859 |
+
"learning_rate": 9.889004131029638e-06,
|
| 26860 |
+
"loss": 19.7873,
|
| 26861 |
+
"step": 38280
|
| 26862 |
+
},
|
| 26863 |
+
{
|
| 26864 |
+
"epoch": 0.7105593404732578,
|
| 26865 |
+
"grad_norm": 36.09375,
|
| 26866 |
+
"learning_rate": 9.888975135243595e-06,
|
| 26867 |
+
"loss": 19.4605,
|
| 26868 |
+
"step": 38290
|
| 26869 |
+
},
|
| 26870 |
+
{
|
| 26871 |
+
"epoch": 0.7107449135577376,
|
| 26872 |
+
"grad_norm": 35.53125,
|
| 26873 |
+
"learning_rate": 9.888946139457553e-06,
|
| 26874 |
+
"loss": 19.2167,
|
| 26875 |
+
"step": 38300
|
| 26876 |
+
},
|
| 26877 |
+
{
|
| 26878 |
+
"epoch": 0.7109304866422174,
|
| 26879 |
+
"grad_norm": 38.0625,
|
| 26880 |
+
"learning_rate": 9.888917143671512e-06,
|
| 26881 |
+
"loss": 19.7455,
|
| 26882 |
+
"step": 38310
|
| 26883 |
+
},
|
| 26884 |
+
{
|
| 26885 |
+
"epoch": 0.7111160597266972,
|
| 26886 |
+
"grad_norm": 36.90625,
|
| 26887 |
+
"learning_rate": 9.888888147885468e-06,
|
| 26888 |
+
"loss": 19.2411,
|
| 26889 |
+
"step": 38320
|
| 26890 |
+
},
|
| 26891 |
+
{
|
| 26892 |
+
"epoch": 0.7113016328111771,
|
| 26893 |
+
"grad_norm": 35.65625,
|
| 26894 |
+
"learning_rate": 9.888859152099425e-06,
|
| 26895 |
+
"loss": 19.6434,
|
| 26896 |
+
"step": 38330
|
| 26897 |
+
},
|
| 26898 |
+
{
|
| 26899 |
+
"epoch": 0.711487205895657,
|
| 26900 |
+
"grad_norm": 35.5,
|
| 26901 |
+
"learning_rate": 9.888830156313384e-06,
|
| 26902 |
+
"loss": 19.2703,
|
| 26903 |
+
"step": 38340
|
| 26904 |
+
},
|
| 26905 |
+
{
|
| 26906 |
+
"epoch": 0.7116727789801367,
|
| 26907 |
+
"grad_norm": 36.0,
|
| 26908 |
+
"learning_rate": 9.888801160527342e-06,
|
| 26909 |
+
"loss": 19.8718,
|
| 26910 |
+
"step": 38350
|
| 26911 |
+
},
|
| 26912 |
+
{
|
| 26913 |
+
"epoch": 0.7118583520646166,
|
| 26914 |
+
"grad_norm": 35.875,
|
| 26915 |
+
"learning_rate": 9.888772164741299e-06,
|
| 26916 |
+
"loss": 19.3253,
|
| 26917 |
+
"step": 38360
|
| 26918 |
+
},
|
| 26919 |
+
{
|
| 26920 |
+
"epoch": 0.7120439251490964,
|
| 26921 |
+
"grad_norm": 36.5,
|
| 26922 |
+
"learning_rate": 9.888743168955256e-06,
|
| 26923 |
+
"loss": 19.5194,
|
| 26924 |
+
"step": 38370
|
| 26925 |
+
},
|
| 26926 |
+
{
|
| 26927 |
+
"epoch": 0.7122294982335762,
|
| 26928 |
+
"grad_norm": 35.28125,
|
| 26929 |
+
"learning_rate": 9.888714173169214e-06,
|
| 26930 |
+
"loss": 19.5886,
|
| 26931 |
+
"step": 38380
|
| 26932 |
+
},
|
| 26933 |
+
{
|
| 26934 |
+
"epoch": 0.712415071318056,
|
| 26935 |
+
"grad_norm": 35.53125,
|
| 26936 |
+
"learning_rate": 9.888685177383171e-06,
|
| 26937 |
+
"loss": 19.4309,
|
| 26938 |
+
"step": 38390
|
| 26939 |
+
},
|
| 26940 |
+
{
|
| 26941 |
+
"epoch": 0.7126006444025359,
|
| 26942 |
+
"grad_norm": 36.0,
|
| 26943 |
+
"learning_rate": 9.888656181597129e-06,
|
| 26944 |
+
"loss": 19.3463,
|
| 26945 |
+
"step": 38400
|
| 26946 |
+
},
|
| 26947 |
+
{
|
| 26948 |
+
"epoch": 0.7127862174870156,
|
| 26949 |
+
"grad_norm": 36.53125,
|
| 26950 |
+
"learning_rate": 9.888627185811088e-06,
|
| 26951 |
+
"loss": 19.6,
|
| 26952 |
+
"step": 38410
|
| 26953 |
+
},
|
| 26954 |
+
{
|
| 26955 |
+
"epoch": 0.7129717905714955,
|
| 26956 |
+
"grad_norm": 35.5,
|
| 26957 |
+
"learning_rate": 9.888598190025045e-06,
|
| 26958 |
+
"loss": 19.4615,
|
| 26959 |
+
"step": 38420
|
| 26960 |
+
},
|
| 26961 |
+
{
|
| 26962 |
+
"epoch": 0.7131573636559754,
|
| 26963 |
+
"grad_norm": 35.0,
|
| 26964 |
+
"learning_rate": 9.888569194239001e-06,
|
| 26965 |
+
"loss": 19.4513,
|
| 26966 |
+
"step": 38430
|
| 26967 |
+
},
|
| 26968 |
+
{
|
| 26969 |
+
"epoch": 0.7133429367404551,
|
| 26970 |
+
"grad_norm": 37.0625,
|
| 26971 |
+
"learning_rate": 9.88854019845296e-06,
|
| 26972 |
+
"loss": 19.8098,
|
| 26973 |
+
"step": 38440
|
| 26974 |
+
},
|
| 26975 |
+
{
|
| 26976 |
+
"epoch": 0.713528509824935,
|
| 26977 |
+
"grad_norm": 37.25,
|
| 26978 |
+
"learning_rate": 9.888511202666917e-06,
|
| 26979 |
+
"loss": 19.3014,
|
| 26980 |
+
"step": 38450
|
| 26981 |
+
},
|
| 26982 |
+
{
|
| 26983 |
+
"epoch": 0.7137140829094148,
|
| 26984 |
+
"grad_norm": 33.9375,
|
| 26985 |
+
"learning_rate": 9.888482206880875e-06,
|
| 26986 |
+
"loss": 19.7513,
|
| 26987 |
+
"step": 38460
|
| 26988 |
+
},
|
| 26989 |
+
{
|
| 26990 |
+
"epoch": 0.7138996559938946,
|
| 26991 |
+
"grad_norm": 34.625,
|
| 26992 |
+
"learning_rate": 9.888453211094832e-06,
|
| 26993 |
+
"loss": 19.3369,
|
| 26994 |
+
"step": 38470
|
| 26995 |
+
},
|
| 26996 |
+
{
|
| 26997 |
+
"epoch": 0.7140852290783745,
|
| 26998 |
+
"grad_norm": 38.0625,
|
| 26999 |
+
"learning_rate": 9.88842421530879e-06,
|
| 27000 |
+
"loss": 19.3678,
|
| 27001 |
+
"step": 38480
|
| 27002 |
+
},
|
| 27003 |
+
{
|
| 27004 |
+
"epoch": 0.7142708021628543,
|
| 27005 |
+
"grad_norm": 36.375,
|
| 27006 |
+
"learning_rate": 9.888395219522747e-06,
|
| 27007 |
+
"loss": 19.3446,
|
| 27008 |
+
"step": 38490
|
| 27009 |
+
},
|
| 27010 |
+
{
|
| 27011 |
+
"epoch": 0.7144563752473341,
|
| 27012 |
+
"grad_norm": 35.15625,
|
| 27013 |
+
"learning_rate": 9.888366223736704e-06,
|
| 27014 |
+
"loss": 19.1207,
|
| 27015 |
+
"step": 38500
|
| 27016 |
+
},
|
| 27017 |
+
{
|
| 27018 |
+
"epoch": 0.7146419483318139,
|
| 27019 |
+
"grad_norm": 35.28125,
|
| 27020 |
+
"learning_rate": 9.888337227950663e-06,
|
| 27021 |
+
"loss": 19.5949,
|
| 27022 |
+
"step": 38510
|
| 27023 |
+
},
|
| 27024 |
+
{
|
| 27025 |
+
"epoch": 0.7148275214162938,
|
| 27026 |
+
"grad_norm": 35.1875,
|
| 27027 |
+
"learning_rate": 9.888308232164621e-06,
|
| 27028 |
+
"loss": 19.5172,
|
| 27029 |
+
"step": 38520
|
| 27030 |
+
},
|
| 27031 |
+
{
|
| 27032 |
+
"epoch": 0.7150130945007737,
|
| 27033 |
+
"grad_norm": 36.71875,
|
| 27034 |
+
"learning_rate": 9.888279236378578e-06,
|
| 27035 |
+
"loss": 19.6522,
|
| 27036 |
+
"step": 38530
|
| 27037 |
+
},
|
| 27038 |
+
{
|
| 27039 |
+
"epoch": 0.7151986675852534,
|
| 27040 |
+
"grad_norm": 34.03125,
|
| 27041 |
+
"learning_rate": 9.888250240592536e-06,
|
| 27042 |
+
"loss": 19.8201,
|
| 27043 |
+
"step": 38540
|
| 27044 |
+
},
|
| 27045 |
+
{
|
| 27046 |
+
"epoch": 0.7153842406697333,
|
| 27047 |
+
"grad_norm": 37.0625,
|
| 27048 |
+
"learning_rate": 9.888221244806493e-06,
|
| 27049 |
+
"loss": 19.2714,
|
| 27050 |
+
"step": 38550
|
| 27051 |
+
},
|
| 27052 |
+
{
|
| 27053 |
+
"epoch": 0.7155698137542131,
|
| 27054 |
+
"grad_norm": 36.78125,
|
| 27055 |
+
"learning_rate": 9.88819224902045e-06,
|
| 27056 |
+
"loss": 19.2433,
|
| 27057 |
+
"step": 38560
|
| 27058 |
+
},
|
| 27059 |
+
{
|
| 27060 |
+
"epoch": 0.7157553868386929,
|
| 27061 |
+
"grad_norm": 35.78125,
|
| 27062 |
+
"learning_rate": 9.888163253234408e-06,
|
| 27063 |
+
"loss": 19.69,
|
| 27064 |
+
"step": 38570
|
| 27065 |
+
},
|
| 27066 |
+
{
|
| 27067 |
+
"epoch": 0.7159409599231727,
|
| 27068 |
+
"grad_norm": 34.5625,
|
| 27069 |
+
"learning_rate": 9.888134257448367e-06,
|
| 27070 |
+
"loss": 19.7708,
|
| 27071 |
+
"step": 38580
|
| 27072 |
+
},
|
| 27073 |
+
{
|
| 27074 |
+
"epoch": 0.7161265330076526,
|
| 27075 |
+
"grad_norm": 34.71875,
|
| 27076 |
+
"learning_rate": 9.888105261662323e-06,
|
| 27077 |
+
"loss": 19.347,
|
| 27078 |
+
"step": 38590
|
| 27079 |
+
},
|
| 27080 |
+
{
|
| 27081 |
+
"epoch": 0.7163121060921324,
|
| 27082 |
+
"grad_norm": 33.90625,
|
| 27083 |
+
"learning_rate": 9.88807626587628e-06,
|
| 27084 |
+
"loss": 19.3059,
|
| 27085 |
+
"step": 38600
|
| 27086 |
+
},
|
| 27087 |
+
{
|
| 27088 |
+
"epoch": 0.7164976791766122,
|
| 27089 |
+
"grad_norm": 37.15625,
|
| 27090 |
+
"learning_rate": 9.88804727009024e-06,
|
| 27091 |
+
"loss": 19.8677,
|
| 27092 |
+
"step": 38610
|
| 27093 |
+
},
|
| 27094 |
+
{
|
| 27095 |
+
"epoch": 0.7166832522610921,
|
| 27096 |
+
"grad_norm": 37.9375,
|
| 27097 |
+
"learning_rate": 9.888018274304197e-06,
|
| 27098 |
+
"loss": 19.4334,
|
| 27099 |
+
"step": 38620
|
| 27100 |
+
},
|
| 27101 |
+
{
|
| 27102 |
+
"epoch": 0.7168688253455718,
|
| 27103 |
+
"grad_norm": 35.25,
|
| 27104 |
+
"learning_rate": 9.887989278518154e-06,
|
| 27105 |
+
"loss": 19.2413,
|
| 27106 |
+
"step": 38630
|
| 27107 |
+
},
|
| 27108 |
+
{
|
| 27109 |
+
"epoch": 0.7170543984300517,
|
| 27110 |
+
"grad_norm": 37.40625,
|
| 27111 |
+
"learning_rate": 9.887960282732111e-06,
|
| 27112 |
+
"loss": 19.6072,
|
| 27113 |
+
"step": 38640
|
| 27114 |
+
},
|
| 27115 |
+
{
|
| 27116 |
+
"epoch": 0.7172399715145316,
|
| 27117 |
+
"grad_norm": 35.625,
|
| 27118 |
+
"learning_rate": 9.887931286946069e-06,
|
| 27119 |
+
"loss": 19.5617,
|
| 27120 |
+
"step": 38650
|
| 27121 |
+
},
|
| 27122 |
+
{
|
| 27123 |
+
"epoch": 0.7174255445990113,
|
| 27124 |
+
"grad_norm": 35.8125,
|
| 27125 |
+
"learning_rate": 9.887902291160026e-06,
|
| 27126 |
+
"loss": 19.2944,
|
| 27127 |
+
"step": 38660
|
| 27128 |
+
},
|
| 27129 |
+
{
|
| 27130 |
+
"epoch": 0.7176111176834912,
|
| 27131 |
+
"grad_norm": 36.78125,
|
| 27132 |
+
"learning_rate": 9.887873295373984e-06,
|
| 27133 |
+
"loss": 19.6959,
|
| 27134 |
+
"step": 38670
|
| 27135 |
+
},
|
| 27136 |
+
{
|
| 27137 |
+
"epoch": 0.717796690767971,
|
| 27138 |
+
"grad_norm": 37.4375,
|
| 27139 |
+
"learning_rate": 9.887844299587943e-06,
|
| 27140 |
+
"loss": 19.6264,
|
| 27141 |
+
"step": 38680
|
| 27142 |
+
},
|
| 27143 |
+
{
|
| 27144 |
+
"epoch": 0.7179822638524508,
|
| 27145 |
+
"grad_norm": 34.40625,
|
| 27146 |
+
"learning_rate": 9.8878153038019e-06,
|
| 27147 |
+
"loss": 19.4896,
|
| 27148 |
+
"step": 38690
|
| 27149 |
+
},
|
| 27150 |
+
{
|
| 27151 |
+
"epoch": 0.7181678369369306,
|
| 27152 |
+
"grad_norm": 34.53125,
|
| 27153 |
+
"learning_rate": 9.887786308015856e-06,
|
| 27154 |
+
"loss": 19.5352,
|
| 27155 |
+
"step": 38700
|
| 27156 |
+
},
|
| 27157 |
+
{
|
| 27158 |
+
"epoch": 0.7183534100214105,
|
| 27159 |
+
"grad_norm": 37.84375,
|
| 27160 |
+
"learning_rate": 9.887757312229815e-06,
|
| 27161 |
+
"loss": 19.5476,
|
| 27162 |
+
"step": 38710
|
| 27163 |
+
},
|
| 27164 |
+
{
|
| 27165 |
+
"epoch": 0.7185389831058904,
|
| 27166 |
+
"grad_norm": 37.0625,
|
| 27167 |
+
"learning_rate": 9.887728316443772e-06,
|
| 27168 |
+
"loss": 19.3343,
|
| 27169 |
+
"step": 38720
|
| 27170 |
+
},
|
| 27171 |
+
{
|
| 27172 |
+
"epoch": 0.7187245561903701,
|
| 27173 |
+
"grad_norm": 33.96875,
|
| 27174 |
+
"learning_rate": 9.88769932065773e-06,
|
| 27175 |
+
"loss": 19.3166,
|
| 27176 |
+
"step": 38730
|
| 27177 |
+
},
|
| 27178 |
+
{
|
| 27179 |
+
"epoch": 0.71891012927485,
|
| 27180 |
+
"grad_norm": 36.34375,
|
| 27181 |
+
"learning_rate": 9.887670324871687e-06,
|
| 27182 |
+
"loss": 19.3764,
|
| 27183 |
+
"step": 38740
|
| 27184 |
+
},
|
| 27185 |
+
{
|
| 27186 |
+
"epoch": 0.7190957023593298,
|
| 27187 |
+
"grad_norm": 36.65625,
|
| 27188 |
+
"learning_rate": 9.887641329085645e-06,
|
| 27189 |
+
"loss": 19.6944,
|
| 27190 |
+
"step": 38750
|
| 27191 |
+
},
|
| 27192 |
+
{
|
| 27193 |
+
"epoch": 0.7192812754438096,
|
| 27194 |
+
"grad_norm": 35.46875,
|
| 27195 |
+
"learning_rate": 9.887612333299602e-06,
|
| 27196 |
+
"loss": 19.4655,
|
| 27197 |
+
"step": 38760
|
| 27198 |
+
},
|
| 27199 |
+
{
|
| 27200 |
+
"epoch": 0.7194668485282895,
|
| 27201 |
+
"grad_norm": 36.375,
|
| 27202 |
+
"learning_rate": 9.88758333751356e-06,
|
| 27203 |
+
"loss": 19.8738,
|
| 27204 |
+
"step": 38770
|
| 27205 |
+
},
|
| 27206 |
+
{
|
| 27207 |
+
"epoch": 0.7196524216127693,
|
| 27208 |
+
"grad_norm": 35.0,
|
| 27209 |
+
"learning_rate": 9.887554341727517e-06,
|
| 27210 |
+
"loss": 19.594,
|
| 27211 |
+
"step": 38780
|
| 27212 |
+
},
|
| 27213 |
+
{
|
| 27214 |
+
"epoch": 0.7198379946972491,
|
| 27215 |
+
"grad_norm": 37.875,
|
| 27216 |
+
"learning_rate": 9.887525345941476e-06,
|
| 27217 |
+
"loss": 19.2177,
|
| 27218 |
+
"step": 38790
|
| 27219 |
+
},
|
| 27220 |
+
{
|
| 27221 |
+
"epoch": 0.7200235677817289,
|
| 27222 |
+
"grad_norm": 34.21875,
|
| 27223 |
+
"learning_rate": 9.887496350155432e-06,
|
| 27224 |
+
"loss": 19.5523,
|
| 27225 |
+
"step": 38800
|
| 27226 |
+
},
|
| 27227 |
+
{
|
| 27228 |
+
"epoch": 0.7202091408662088,
|
| 27229 |
+
"grad_norm": 37.1875,
|
| 27230 |
+
"learning_rate": 9.88746735436939e-06,
|
| 27231 |
+
"loss": 19.6112,
|
| 27232 |
+
"step": 38810
|
| 27233 |
+
},
|
| 27234 |
+
{
|
| 27235 |
+
"epoch": 0.7203947139506885,
|
| 27236 |
+
"grad_norm": 33.9375,
|
| 27237 |
+
"learning_rate": 9.887438358583348e-06,
|
| 27238 |
+
"loss": 19.4369,
|
| 27239 |
+
"step": 38820
|
| 27240 |
+
},
|
| 27241 |
+
{
|
| 27242 |
+
"epoch": 0.7205802870351684,
|
| 27243 |
+
"grad_norm": 38.28125,
|
| 27244 |
+
"learning_rate": 9.887409362797306e-06,
|
| 27245 |
+
"loss": 19.5935,
|
| 27246 |
+
"step": 38830
|
| 27247 |
+
},
|
| 27248 |
+
{
|
| 27249 |
+
"epoch": 0.7207658601196483,
|
| 27250 |
+
"grad_norm": 35.0,
|
| 27251 |
+
"learning_rate": 9.887380367011263e-06,
|
| 27252 |
+
"loss": 19.5737,
|
| 27253 |
+
"step": 38840
|
| 27254 |
+
},
|
| 27255 |
+
{
|
| 27256 |
+
"epoch": 0.720951433204128,
|
| 27257 |
+
"grad_norm": 35.96875,
|
| 27258 |
+
"learning_rate": 9.88735137122522e-06,
|
| 27259 |
+
"loss": 19.4008,
|
| 27260 |
+
"step": 38850
|
| 27261 |
+
},
|
| 27262 |
+
{
|
| 27263 |
+
"epoch": 0.7211370062886079,
|
| 27264 |
+
"grad_norm": 36.125,
|
| 27265 |
+
"learning_rate": 9.887322375439178e-06,
|
| 27266 |
+
"loss": 19.1818,
|
| 27267 |
+
"step": 38860
|
| 27268 |
+
},
|
| 27269 |
+
{
|
| 27270 |
+
"epoch": 0.7213225793730877,
|
| 27271 |
+
"grad_norm": 34.03125,
|
| 27272 |
+
"learning_rate": 9.887293379653135e-06,
|
| 27273 |
+
"loss": 19.4143,
|
| 27274 |
+
"step": 38870
|
| 27275 |
+
},
|
| 27276 |
+
{
|
| 27277 |
+
"epoch": 0.7215081524575676,
|
| 27278 |
+
"grad_norm": 34.46875,
|
| 27279 |
+
"learning_rate": 9.887264383867093e-06,
|
| 27280 |
+
"loss": 19.5025,
|
| 27281 |
+
"step": 38880
|
| 27282 |
+
},
|
| 27283 |
+
{
|
| 27284 |
+
"epoch": 0.7216937255420474,
|
| 27285 |
+
"grad_norm": 37.75,
|
| 27286 |
+
"learning_rate": 9.887235388081052e-06,
|
| 27287 |
+
"loss": 19.3903,
|
| 27288 |
+
"step": 38890
|
| 27289 |
+
},
|
| 27290 |
+
{
|
| 27291 |
+
"epoch": 0.7218792986265272,
|
| 27292 |
+
"grad_norm": 36.15625,
|
| 27293 |
+
"learning_rate": 9.88720639229501e-06,
|
| 27294 |
+
"loss": 19.2237,
|
| 27295 |
+
"step": 38900
|
| 27296 |
+
},
|
| 27297 |
+
{
|
| 27298 |
+
"epoch": 0.7220648717110071,
|
| 27299 |
+
"grad_norm": 35.21875,
|
| 27300 |
+
"learning_rate": 9.887177396508965e-06,
|
| 27301 |
+
"loss": 19.0859,
|
| 27302 |
+
"step": 38910
|
| 27303 |
+
},
|
| 27304 |
+
{
|
| 27305 |
+
"epoch": 0.7222504447954868,
|
| 27306 |
+
"grad_norm": 36.5,
|
| 27307 |
+
"learning_rate": 9.887148400722924e-06,
|
| 27308 |
+
"loss": 19.3678,
|
| 27309 |
+
"step": 38920
|
| 27310 |
+
},
|
| 27311 |
+
{
|
| 27312 |
+
"epoch": 0.7224360178799667,
|
| 27313 |
+
"grad_norm": 36.9375,
|
| 27314 |
+
"learning_rate": 9.887119404936881e-06,
|
| 27315 |
+
"loss": 19.5165,
|
| 27316 |
+
"step": 38930
|
| 27317 |
+
},
|
| 27318 |
+
{
|
| 27319 |
+
"epoch": 0.7226215909644466,
|
| 27320 |
+
"grad_norm": 35.40625,
|
| 27321 |
+
"learning_rate": 9.887090409150839e-06,
|
| 27322 |
+
"loss": 19.8087,
|
| 27323 |
+
"step": 38940
|
| 27324 |
+
},
|
| 27325 |
+
{
|
| 27326 |
+
"epoch": 0.7228071640489263,
|
| 27327 |
+
"grad_norm": 36.6875,
|
| 27328 |
+
"learning_rate": 9.887061413364796e-06,
|
| 27329 |
+
"loss": 19.7642,
|
| 27330 |
+
"step": 38950
|
| 27331 |
+
},
|
| 27332 |
+
{
|
| 27333 |
+
"epoch": 0.7229927371334062,
|
| 27334 |
+
"grad_norm": 35.09375,
|
| 27335 |
+
"learning_rate": 9.887032417578754e-06,
|
| 27336 |
+
"loss": 19.627,
|
| 27337 |
+
"step": 38960
|
| 27338 |
+
},
|
| 27339 |
+
{
|
| 27340 |
+
"epoch": 0.723178310217886,
|
| 27341 |
+
"grad_norm": 35.84375,
|
| 27342 |
+
"learning_rate": 9.887003421792711e-06,
|
| 27343 |
+
"loss": 19.519,
|
| 27344 |
+
"step": 38970
|
| 27345 |
+
},
|
| 27346 |
+
{
|
| 27347 |
+
"epoch": 0.7233638833023658,
|
| 27348 |
+
"grad_norm": 35.625,
|
| 27349 |
+
"learning_rate": 9.886974426006668e-06,
|
| 27350 |
+
"loss": 19.3384,
|
| 27351 |
+
"step": 38980
|
| 27352 |
+
},
|
| 27353 |
+
{
|
| 27354 |
+
"epoch": 0.7235494563868456,
|
| 27355 |
+
"grad_norm": 35.15625,
|
| 27356 |
+
"learning_rate": 9.886945430220628e-06,
|
| 27357 |
+
"loss": 20.0005,
|
| 27358 |
+
"step": 38990
|
| 27359 |
+
},
|
| 27360 |
+
{
|
| 27361 |
+
"epoch": 0.7237350294713255,
|
| 27362 |
+
"grad_norm": 38.75,
|
| 27363 |
+
"learning_rate": 9.886916434434585e-06,
|
| 27364 |
+
"loss": 19.7862,
|
| 27365 |
+
"step": 39000
|
| 27366 |
+
},
|
| 27367 |
+
{
|
| 27368 |
+
"epoch": 0.7239206025558053,
|
| 27369 |
+
"grad_norm": 35.1875,
|
| 27370 |
+
"learning_rate": 9.886887438648542e-06,
|
| 27371 |
+
"loss": 19.2984,
|
| 27372 |
+
"step": 39010
|
| 27373 |
+
},
|
| 27374 |
+
{
|
| 27375 |
+
"epoch": 0.7241061756402851,
|
| 27376 |
+
"grad_norm": 37.0625,
|
| 27377 |
+
"learning_rate": 9.8868584428625e-06,
|
| 27378 |
+
"loss": 19.7105,
|
| 27379 |
+
"step": 39020
|
| 27380 |
+
},
|
| 27381 |
+
{
|
| 27382 |
+
"epoch": 0.724291748724765,
|
| 27383 |
+
"grad_norm": 35.71875,
|
| 27384 |
+
"learning_rate": 9.886829447076457e-06,
|
| 27385 |
+
"loss": 19.3705,
|
| 27386 |
+
"step": 39030
|
| 27387 |
+
},
|
| 27388 |
+
{
|
| 27389 |
+
"epoch": 0.7244773218092447,
|
| 27390 |
+
"grad_norm": 35.0,
|
| 27391 |
+
"learning_rate": 9.886800451290415e-06,
|
| 27392 |
+
"loss": 19.2526,
|
| 27393 |
+
"step": 39040
|
| 27394 |
+
},
|
| 27395 |
+
{
|
| 27396 |
+
"epoch": 0.7246628948937246,
|
| 27397 |
+
"grad_norm": 34.84375,
|
| 27398 |
+
"learning_rate": 9.886771455504372e-06,
|
| 27399 |
+
"loss": 19.6478,
|
| 27400 |
+
"step": 39050
|
| 27401 |
+
},
|
| 27402 |
+
{
|
| 27403 |
+
"epoch": 0.7248484679782045,
|
| 27404 |
+
"grad_norm": 36.53125,
|
| 27405 |
+
"learning_rate": 9.886742459718331e-06,
|
| 27406 |
+
"loss": 19.5643,
|
| 27407 |
+
"step": 39060
|
| 27408 |
+
},
|
| 27409 |
+
{
|
| 27410 |
+
"epoch": 0.7250340410626843,
|
| 27411 |
+
"grad_norm": 39.0625,
|
| 27412 |
+
"learning_rate": 9.886713463932287e-06,
|
| 27413 |
+
"loss": 19.0415,
|
| 27414 |
+
"step": 39070
|
| 27415 |
+
},
|
| 27416 |
+
{
|
| 27417 |
+
"epoch": 0.7252196141471641,
|
| 27418 |
+
"grad_norm": 33.71875,
|
| 27419 |
+
"learning_rate": 9.886684468146244e-06,
|
| 27420 |
+
"loss": 19.0971,
|
| 27421 |
+
"step": 39080
|
| 27422 |
+
},
|
| 27423 |
+
{
|
| 27424 |
+
"epoch": 0.7254051872316439,
|
| 27425 |
+
"grad_norm": 34.90625,
|
| 27426 |
+
"learning_rate": 9.886655472360203e-06,
|
| 27427 |
+
"loss": 19.5161,
|
| 27428 |
+
"step": 39090
|
| 27429 |
+
},
|
| 27430 |
+
{
|
| 27431 |
+
"epoch": 0.7255907603161238,
|
| 27432 |
+
"grad_norm": 36.0625,
|
| 27433 |
+
"learning_rate": 9.88662647657416e-06,
|
| 27434 |
+
"loss": 19.7788,
|
| 27435 |
+
"step": 39100
|
| 27436 |
+
},
|
| 27437 |
+
{
|
| 27438 |
+
"epoch": 0.7257763334006035,
|
| 27439 |
+
"grad_norm": 35.90625,
|
| 27440 |
+
"learning_rate": 9.886597480788118e-06,
|
| 27441 |
+
"loss": 19.4104,
|
| 27442 |
+
"step": 39110
|
| 27443 |
+
},
|
| 27444 |
+
{
|
| 27445 |
+
"epoch": 0.7259619064850834,
|
| 27446 |
+
"grad_norm": 35.5,
|
| 27447 |
+
"learning_rate": 9.886568485002075e-06,
|
| 27448 |
+
"loss": 19.7646,
|
| 27449 |
+
"step": 39120
|
| 27450 |
+
},
|
| 27451 |
+
{
|
| 27452 |
+
"epoch": 0.7261474795695633,
|
| 27453 |
+
"grad_norm": 35.625,
|
| 27454 |
+
"learning_rate": 9.886539489216033e-06,
|
| 27455 |
+
"loss": 19.6163,
|
| 27456 |
+
"step": 39130
|
| 27457 |
+
},
|
| 27458 |
+
{
|
| 27459 |
+
"epoch": 0.726333052654043,
|
| 27460 |
+
"grad_norm": 36.34375,
|
| 27461 |
+
"learning_rate": 9.88651049342999e-06,
|
| 27462 |
+
"loss": 19.5463,
|
| 27463 |
+
"step": 39140
|
| 27464 |
+
},
|
| 27465 |
+
{
|
| 27466 |
+
"epoch": 0.7265186257385229,
|
| 27467 |
+
"grad_norm": 35.5625,
|
| 27468 |
+
"learning_rate": 9.886481497643948e-06,
|
| 27469 |
+
"loss": 19.2152,
|
| 27470 |
+
"step": 39150
|
| 27471 |
+
},
|
| 27472 |
+
{
|
| 27473 |
+
"epoch": 0.7267041988230027,
|
| 27474 |
+
"grad_norm": 34.84375,
|
| 27475 |
+
"learning_rate": 9.886452501857907e-06,
|
| 27476 |
+
"loss": 19.5463,
|
| 27477 |
+
"step": 39160
|
| 27478 |
+
},
|
| 27479 |
+
{
|
| 27480 |
+
"epoch": 0.7268897719074825,
|
| 27481 |
+
"grad_norm": 34.65625,
|
| 27482 |
+
"learning_rate": 9.886423506071864e-06,
|
| 27483 |
+
"loss": 19.6092,
|
| 27484 |
+
"step": 39170
|
| 27485 |
+
},
|
| 27486 |
+
{
|
| 27487 |
+
"epoch": 0.7270753449919624,
|
| 27488 |
+
"grad_norm": 34.59375,
|
| 27489 |
+
"learning_rate": 9.88639451028582e-06,
|
| 27490 |
+
"loss": 19.4284,
|
| 27491 |
+
"step": 39180
|
| 27492 |
+
},
|
| 27493 |
+
{
|
| 27494 |
+
"epoch": 0.7272609180764422,
|
| 27495 |
+
"grad_norm": 34.8125,
|
| 27496 |
+
"learning_rate": 9.886365514499779e-06,
|
| 27497 |
+
"loss": 19.3534,
|
| 27498 |
+
"step": 39190
|
| 27499 |
+
},
|
| 27500 |
+
{
|
| 27501 |
+
"epoch": 0.727446491160922,
|
| 27502 |
+
"grad_norm": 36.0,
|
| 27503 |
+
"learning_rate": 9.886336518713736e-06,
|
| 27504 |
+
"loss": 19.8356,
|
| 27505 |
+
"step": 39200
|
| 27506 |
+
},
|
| 27507 |
+
{
|
| 27508 |
+
"epoch": 0.7276320642454018,
|
| 27509 |
+
"grad_norm": 35.28125,
|
| 27510 |
+
"learning_rate": 9.886307522927694e-06,
|
| 27511 |
+
"loss": 19.2134,
|
| 27512 |
+
"step": 39210
|
| 27513 |
+
},
|
| 27514 |
+
{
|
| 27515 |
+
"epoch": 0.7278176373298817,
|
| 27516 |
+
"grad_norm": 37.53125,
|
| 27517 |
+
"learning_rate": 9.886278527141651e-06,
|
| 27518 |
+
"loss": 19.133,
|
| 27519 |
+
"step": 39220
|
| 27520 |
+
},
|
| 27521 |
+
{
|
| 27522 |
+
"epoch": 0.7280032104143614,
|
| 27523 |
+
"grad_norm": 35.375,
|
| 27524 |
+
"learning_rate": 9.886249531355609e-06,
|
| 27525 |
+
"loss": 19.0139,
|
| 27526 |
+
"step": 39230
|
| 27527 |
+
},
|
| 27528 |
+
{
|
| 27529 |
+
"epoch": 0.7281887834988413,
|
| 27530 |
+
"grad_norm": 37.25,
|
| 27531 |
+
"learning_rate": 9.886220535569566e-06,
|
| 27532 |
+
"loss": 19.1991,
|
| 27533 |
+
"step": 39240
|
| 27534 |
+
},
|
| 27535 |
+
{
|
| 27536 |
+
"epoch": 0.7283743565833212,
|
| 27537 |
+
"grad_norm": 34.84375,
|
| 27538 |
+
"learning_rate": 9.886191539783523e-06,
|
| 27539 |
+
"loss": 19.4178,
|
| 27540 |
+
"step": 39250
|
| 27541 |
+
},
|
| 27542 |
+
{
|
| 27543 |
+
"epoch": 0.728559929667801,
|
| 27544 |
+
"grad_norm": 35.28125,
|
| 27545 |
+
"learning_rate": 9.886162543997483e-06,
|
| 27546 |
+
"loss": 19.7294,
|
| 27547 |
+
"step": 39260
|
| 27548 |
+
},
|
| 27549 |
+
{
|
| 27550 |
+
"epoch": 0.7287455027522808,
|
| 27551 |
+
"grad_norm": 36.90625,
|
| 27552 |
+
"learning_rate": 9.88613354821144e-06,
|
| 27553 |
+
"loss": 19.5717,
|
| 27554 |
+
"step": 39270
|
| 27555 |
+
},
|
| 27556 |
+
{
|
| 27557 |
+
"epoch": 0.7289310758367606,
|
| 27558 |
+
"grad_norm": 36.5,
|
| 27559 |
+
"learning_rate": 9.886104552425397e-06,
|
| 27560 |
+
"loss": 19.4598,
|
| 27561 |
+
"step": 39280
|
| 27562 |
+
},
|
| 27563 |
+
{
|
| 27564 |
+
"epoch": 0.7291166489212405,
|
| 27565 |
+
"grad_norm": 36.96875,
|
| 27566 |
+
"learning_rate": 9.886075556639355e-06,
|
| 27567 |
+
"loss": 19.3378,
|
| 27568 |
+
"step": 39290
|
| 27569 |
+
},
|
| 27570 |
+
{
|
| 27571 |
+
"epoch": 0.7293022220057203,
|
| 27572 |
+
"grad_norm": 37.21875,
|
| 27573 |
+
"learning_rate": 9.886046560853312e-06,
|
| 27574 |
+
"loss": 19.6847,
|
| 27575 |
+
"step": 39300
|
| 27576 |
+
},
|
| 27577 |
+
{
|
| 27578 |
+
"epoch": 0.7294877950902001,
|
| 27579 |
+
"grad_norm": 35.3125,
|
| 27580 |
+
"learning_rate": 9.88601756506727e-06,
|
| 27581 |
+
"loss": 19.398,
|
| 27582 |
+
"step": 39310
|
| 27583 |
+
},
|
| 27584 |
+
{
|
| 27585 |
+
"epoch": 0.72967336817468,
|
| 27586 |
+
"grad_norm": 35.375,
|
| 27587 |
+
"learning_rate": 9.885988569281227e-06,
|
| 27588 |
+
"loss": 19.577,
|
| 27589 |
+
"step": 39320
|
| 27590 |
+
},
|
| 27591 |
+
{
|
| 27592 |
+
"epoch": 0.7298589412591597,
|
| 27593 |
+
"grad_norm": 34.8125,
|
| 27594 |
+
"learning_rate": 9.885959573495184e-06,
|
| 27595 |
+
"loss": 19.5882,
|
| 27596 |
+
"step": 39330
|
| 27597 |
+
},
|
| 27598 |
+
{
|
| 27599 |
+
"epoch": 0.7300445143436396,
|
| 27600 |
+
"grad_norm": 34.71875,
|
| 27601 |
+
"learning_rate": 9.885930577709142e-06,
|
| 27602 |
+
"loss": 19.9676,
|
| 27603 |
+
"step": 39340
|
| 27604 |
+
},
|
| 27605 |
+
{
|
| 27606 |
+
"epoch": 0.7302300874281195,
|
| 27607 |
+
"grad_norm": 34.1875,
|
| 27608 |
+
"learning_rate": 9.8859015819231e-06,
|
| 27609 |
+
"loss": 18.9682,
|
| 27610 |
+
"step": 39350
|
| 27611 |
+
},
|
| 27612 |
+
{
|
| 27613 |
+
"epoch": 0.7304156605125992,
|
| 27614 |
+
"grad_norm": 35.0625,
|
| 27615 |
+
"learning_rate": 9.885872586137057e-06,
|
| 27616 |
+
"loss": 18.9754,
|
| 27617 |
+
"step": 39360
|
| 27618 |
+
},
|
| 27619 |
+
{
|
| 27620 |
+
"epoch": 0.7306012335970791,
|
| 27621 |
+
"grad_norm": 36.59375,
|
| 27622 |
+
"learning_rate": 9.885843590351016e-06,
|
| 27623 |
+
"loss": 19.5236,
|
| 27624 |
+
"step": 39370
|
| 27625 |
+
},
|
| 27626 |
+
{
|
| 27627 |
+
"epoch": 0.7307868066815589,
|
| 27628 |
+
"grad_norm": 35.34375,
|
| 27629 |
+
"learning_rate": 9.885814594564973e-06,
|
| 27630 |
+
"loss": 19.5679,
|
| 27631 |
+
"step": 39380
|
| 27632 |
+
},
|
| 27633 |
+
{
|
| 27634 |
+
"epoch": 0.7309723797660387,
|
| 27635 |
+
"grad_norm": 34.25,
|
| 27636 |
+
"learning_rate": 9.88578559877893e-06,
|
| 27637 |
+
"loss": 19.5254,
|
| 27638 |
+
"step": 39390
|
| 27639 |
+
},
|
| 27640 |
+
{
|
| 27641 |
+
"epoch": 0.7311579528505185,
|
| 27642 |
+
"grad_norm": 35.0,
|
| 27643 |
+
"learning_rate": 9.885756602992888e-06,
|
| 27644 |
+
"loss": 19.4094,
|
| 27645 |
+
"step": 39400
|
| 27646 |
+
},
|
| 27647 |
+
{
|
| 27648 |
+
"epoch": 0.7313435259349984,
|
| 27649 |
+
"grad_norm": 35.40625,
|
| 27650 |
+
"learning_rate": 9.885727607206845e-06,
|
| 27651 |
+
"loss": 19.3836,
|
| 27652 |
+
"step": 39410
|
| 27653 |
+
},
|
| 27654 |
+
{
|
| 27655 |
+
"epoch": 0.7315290990194783,
|
| 27656 |
+
"grad_norm": 36.3125,
|
| 27657 |
+
"learning_rate": 9.885698611420803e-06,
|
| 27658 |
+
"loss": 19.7532,
|
| 27659 |
+
"step": 39420
|
| 27660 |
+
},
|
| 27661 |
+
{
|
| 27662 |
+
"epoch": 0.731714672103958,
|
| 27663 |
+
"grad_norm": 36.78125,
|
| 27664 |
+
"learning_rate": 9.88566961563476e-06,
|
| 27665 |
+
"loss": 19.3568,
|
| 27666 |
+
"step": 39430
|
| 27667 |
+
},
|
| 27668 |
+
{
|
| 27669 |
+
"epoch": 0.7319002451884379,
|
| 27670 |
+
"grad_norm": 34.90625,
|
| 27671 |
+
"learning_rate": 9.88564061984872e-06,
|
| 27672 |
+
"loss": 19.3629,
|
| 27673 |
+
"step": 39440
|
| 27674 |
+
},
|
| 27675 |
+
{
|
| 27676 |
+
"epoch": 0.7320858182729177,
|
| 27677 |
+
"grad_norm": 35.875,
|
| 27678 |
+
"learning_rate": 9.885611624062675e-06,
|
| 27679 |
+
"loss": 19.2368,
|
| 27680 |
+
"step": 39450
|
| 27681 |
+
},
|
| 27682 |
+
{
|
| 27683 |
+
"epoch": 0.7322713913573975,
|
| 27684 |
+
"grad_norm": 33.84375,
|
| 27685 |
+
"learning_rate": 9.885582628276632e-06,
|
| 27686 |
+
"loss": 19.4356,
|
| 27687 |
+
"step": 39460
|
| 27688 |
+
},
|
| 27689 |
+
{
|
| 27690 |
+
"epoch": 0.7324569644418774,
|
| 27691 |
+
"grad_norm": 34.8125,
|
| 27692 |
+
"learning_rate": 9.885553632490592e-06,
|
| 27693 |
+
"loss": 19.1461,
|
| 27694 |
+
"step": 39470
|
| 27695 |
+
},
|
| 27696 |
+
{
|
| 27697 |
+
"epoch": 0.7326425375263572,
|
| 27698 |
+
"grad_norm": 36.34375,
|
| 27699 |
+
"learning_rate": 9.885524636704549e-06,
|
| 27700 |
+
"loss": 19.5205,
|
| 27701 |
+
"step": 39480
|
| 27702 |
+
},
|
| 27703 |
+
{
|
| 27704 |
+
"epoch": 0.732828110610837,
|
| 27705 |
+
"grad_norm": 36.5,
|
| 27706 |
+
"learning_rate": 9.885495640918506e-06,
|
| 27707 |
+
"loss": 19.2334,
|
| 27708 |
+
"step": 39490
|
| 27709 |
+
},
|
| 27710 |
+
{
|
| 27711 |
+
"epoch": 0.7330136836953168,
|
| 27712 |
+
"grad_norm": 34.40625,
|
| 27713 |
+
"learning_rate": 9.885466645132464e-06,
|
| 27714 |
+
"loss": 19.2247,
|
| 27715 |
+
"step": 39500
|
| 27716 |
+
},
|
| 27717 |
+
{
|
| 27718 |
+
"epoch": 0.7331992567797967,
|
| 27719 |
+
"grad_norm": 36.90625,
|
| 27720 |
+
"learning_rate": 9.885437649346421e-06,
|
| 27721 |
+
"loss": 19.5144,
|
| 27722 |
+
"step": 39510
|
| 27723 |
+
},
|
| 27724 |
+
{
|
| 27725 |
+
"epoch": 0.7333848298642764,
|
| 27726 |
+
"grad_norm": 36.25,
|
| 27727 |
+
"learning_rate": 9.885408653560379e-06,
|
| 27728 |
+
"loss": 18.9981,
|
| 27729 |
+
"step": 39520
|
| 27730 |
+
},
|
| 27731 |
+
{
|
| 27732 |
+
"epoch": 0.7335704029487563,
|
| 27733 |
+
"grad_norm": 35.4375,
|
| 27734 |
+
"learning_rate": 9.885379657774336e-06,
|
| 27735 |
+
"loss": 19.2192,
|
| 27736 |
+
"step": 39530
|
| 27737 |
+
},
|
| 27738 |
+
{
|
| 27739 |
+
"epoch": 0.7337559760332362,
|
| 27740 |
+
"grad_norm": 36.8125,
|
| 27741 |
+
"learning_rate": 9.885350661988295e-06,
|
| 27742 |
+
"loss": 19.2174,
|
| 27743 |
+
"step": 39540
|
| 27744 |
+
},
|
| 27745 |
+
{
|
| 27746 |
+
"epoch": 0.7339415491177159,
|
| 27747 |
+
"grad_norm": 33.6875,
|
| 27748 |
+
"learning_rate": 9.88532166620225e-06,
|
| 27749 |
+
"loss": 19.6817,
|
| 27750 |
+
"step": 39550
|
| 27751 |
+
},
|
| 27752 |
+
{
|
| 27753 |
+
"epoch": 0.7341271222021958,
|
| 27754 |
+
"grad_norm": 36.875,
|
| 27755 |
+
"learning_rate": 9.885292670416208e-06,
|
| 27756 |
+
"loss": 18.8573,
|
| 27757 |
+
"step": 39560
|
| 27758 |
+
},
|
| 27759 |
+
{
|
| 27760 |
+
"epoch": 0.7343126952866756,
|
| 27761 |
+
"grad_norm": 37.375,
|
| 27762 |
+
"learning_rate": 9.885263674630167e-06,
|
| 27763 |
+
"loss": 19.7325,
|
| 27764 |
+
"step": 39570
|
| 27765 |
+
},
|
| 27766 |
+
{
|
| 27767 |
+
"epoch": 0.7344982683711554,
|
| 27768 |
+
"grad_norm": 37.125,
|
| 27769 |
+
"learning_rate": 9.885234678844125e-06,
|
| 27770 |
+
"loss": 19.4219,
|
| 27771 |
+
"step": 39580
|
| 27772 |
+
},
|
| 27773 |
+
{
|
| 27774 |
+
"epoch": 0.7346838414556353,
|
| 27775 |
+
"grad_norm": 35.46875,
|
| 27776 |
+
"learning_rate": 9.885205683058082e-06,
|
| 27777 |
+
"loss": 19.1926,
|
| 27778 |
+
"step": 39590
|
| 27779 |
+
},
|
| 27780 |
+
{
|
| 27781 |
+
"epoch": 0.7348694145401151,
|
| 27782 |
+
"grad_norm": 35.71875,
|
| 27783 |
+
"learning_rate": 9.88517668727204e-06,
|
| 27784 |
+
"loss": 19.064,
|
| 27785 |
+
"step": 39600
|
| 27786 |
+
},
|
| 27787 |
+
{
|
| 27788 |
+
"epoch": 0.735054987624595,
|
| 27789 |
+
"grad_norm": 36.34375,
|
| 27790 |
+
"learning_rate": 9.885147691485997e-06,
|
| 27791 |
+
"loss": 19.0486,
|
| 27792 |
+
"step": 39610
|
| 27793 |
+
},
|
| 27794 |
+
{
|
| 27795 |
+
"epoch": 0.7352405607090747,
|
| 27796 |
+
"grad_norm": 36.03125,
|
| 27797 |
+
"learning_rate": 9.885118695699954e-06,
|
| 27798 |
+
"loss": 19.7487,
|
| 27799 |
+
"step": 39620
|
| 27800 |
+
},
|
| 27801 |
+
{
|
| 27802 |
+
"epoch": 0.7354261337935546,
|
| 27803 |
+
"grad_norm": 36.25,
|
| 27804 |
+
"learning_rate": 9.885089699913912e-06,
|
| 27805 |
+
"loss": 19.5051,
|
| 27806 |
+
"step": 39630
|
| 27807 |
+
},
|
| 27808 |
+
{
|
| 27809 |
+
"epoch": 0.7356117068780345,
|
| 27810 |
+
"grad_norm": 36.375,
|
| 27811 |
+
"learning_rate": 9.88506070412787e-06,
|
| 27812 |
+
"loss": 19.2603,
|
| 27813 |
+
"step": 39640
|
| 27814 |
+
},
|
| 27815 |
+
{
|
| 27816 |
+
"epoch": 0.7357972799625142,
|
| 27817 |
+
"grad_norm": 35.5625,
|
| 27818 |
+
"learning_rate": 9.885031708341828e-06,
|
| 27819 |
+
"loss": 19.957,
|
| 27820 |
+
"step": 39650
|
| 27821 |
+
},
|
| 27822 |
+
{
|
| 27823 |
+
"epoch": 0.7359828530469941,
|
| 27824 |
+
"grad_norm": 36.34375,
|
| 27825 |
+
"learning_rate": 9.885002712555784e-06,
|
| 27826 |
+
"loss": 19.3855,
|
| 27827 |
+
"step": 39660
|
| 27828 |
+
},
|
| 27829 |
+
{
|
| 27830 |
+
"epoch": 0.7361684261314739,
|
| 27831 |
+
"grad_norm": 34.1875,
|
| 27832 |
+
"learning_rate": 9.884973716769743e-06,
|
| 27833 |
+
"loss": 19.6183,
|
| 27834 |
+
"step": 39670
|
| 27835 |
+
},
|
| 27836 |
+
{
|
| 27837 |
+
"epoch": 0.7363539992159537,
|
| 27838 |
+
"grad_norm": 37.125,
|
| 27839 |
+
"learning_rate": 9.8849447209837e-06,
|
| 27840 |
+
"loss": 19.1751,
|
| 27841 |
+
"step": 39680
|
| 27842 |
+
},
|
| 27843 |
+
{
|
| 27844 |
+
"epoch": 0.7365395723004335,
|
| 27845 |
+
"grad_norm": 35.15625,
|
| 27846 |
+
"learning_rate": 9.884915725197658e-06,
|
| 27847 |
+
"loss": 19.4365,
|
| 27848 |
+
"step": 39690
|
| 27849 |
+
},
|
| 27850 |
+
{
|
| 27851 |
+
"epoch": 0.7367251453849134,
|
| 27852 |
+
"grad_norm": 37.28125,
|
| 27853 |
+
"learning_rate": 9.884886729411615e-06,
|
| 27854 |
+
"loss": 19.867,
|
| 27855 |
+
"step": 39700
|
| 27856 |
+
},
|
| 27857 |
+
{
|
| 27858 |
+
"epoch": 0.7369107184693932,
|
| 27859 |
+
"grad_norm": 34.4375,
|
| 27860 |
+
"learning_rate": 9.884857733625574e-06,
|
| 27861 |
+
"loss": 19.4728,
|
| 27862 |
+
"step": 39710
|
| 27863 |
+
},
|
| 27864 |
+
{
|
| 27865 |
+
"epoch": 0.737096291553873,
|
| 27866 |
+
"grad_norm": 36.9375,
|
| 27867 |
+
"learning_rate": 9.88482873783953e-06,
|
| 27868 |
+
"loss": 19.0685,
|
| 27869 |
+
"step": 39720
|
| 27870 |
+
},
|
| 27871 |
+
{
|
| 27872 |
+
"epoch": 0.7372818646383529,
|
| 27873 |
+
"grad_norm": 35.34375,
|
| 27874 |
+
"learning_rate": 9.884799742053487e-06,
|
| 27875 |
+
"loss": 19.7118,
|
| 27876 |
+
"step": 39730
|
| 27877 |
+
},
|
| 27878 |
+
{
|
| 27879 |
+
"epoch": 0.7374674377228326,
|
| 27880 |
+
"grad_norm": 36.5,
|
| 27881 |
+
"learning_rate": 9.884770746267447e-06,
|
| 27882 |
+
"loss": 19.6547,
|
| 27883 |
+
"step": 39740
|
| 27884 |
+
},
|
| 27885 |
+
{
|
| 27886 |
+
"epoch": 0.7376530108073125,
|
| 27887 |
+
"grad_norm": 34.84375,
|
| 27888 |
+
"learning_rate": 9.884741750481404e-06,
|
| 27889 |
+
"loss": 19.3336,
|
| 27890 |
+
"step": 39750
|
| 27891 |
+
},
|
| 27892 |
+
{
|
| 27893 |
+
"epoch": 0.7378385838917924,
|
| 27894 |
+
"grad_norm": 33.96875,
|
| 27895 |
+
"learning_rate": 9.884712754695361e-06,
|
| 27896 |
+
"loss": 19.3172,
|
| 27897 |
+
"step": 39760
|
| 27898 |
+
},
|
| 27899 |
+
{
|
| 27900 |
+
"epoch": 0.7380241569762722,
|
| 27901 |
+
"grad_norm": 35.15625,
|
| 27902 |
+
"learning_rate": 9.884683758909319e-06,
|
| 27903 |
+
"loss": 19.659,
|
| 27904 |
+
"step": 39770
|
| 27905 |
+
},
|
| 27906 |
+
{
|
| 27907 |
+
"epoch": 0.738209730060752,
|
| 27908 |
+
"grad_norm": 34.75,
|
| 27909 |
+
"learning_rate": 9.884654763123276e-06,
|
| 27910 |
+
"loss": 19.564,
|
| 27911 |
+
"step": 39780
|
| 27912 |
+
},
|
| 27913 |
+
{
|
| 27914 |
+
"epoch": 0.7383953031452318,
|
| 27915 |
+
"grad_norm": 36.9375,
|
| 27916 |
+
"learning_rate": 9.884625767337234e-06,
|
| 27917 |
+
"loss": 19.4628,
|
| 27918 |
+
"step": 39790
|
| 27919 |
+
},
|
| 27920 |
+
{
|
| 27921 |
+
"epoch": 0.7385808762297117,
|
| 27922 |
+
"grad_norm": 35.28125,
|
| 27923 |
+
"learning_rate": 9.884596771551191e-06,
|
| 27924 |
+
"loss": 19.3677,
|
| 27925 |
+
"step": 39800
|
| 27926 |
+
},
|
| 27927 |
+
{
|
| 27928 |
+
"epoch": 0.7387664493141914,
|
| 27929 |
+
"grad_norm": 36.03125,
|
| 27930 |
+
"learning_rate": 9.884567775765148e-06,
|
| 27931 |
+
"loss": 19.5696,
|
| 27932 |
+
"step": 39810
|
| 27933 |
+
},
|
| 27934 |
+
{
|
| 27935 |
+
"epoch": 0.7389520223986713,
|
| 27936 |
+
"grad_norm": 34.5,
|
| 27937 |
+
"learning_rate": 9.884538779979106e-06,
|
| 27938 |
+
"loss": 19.2716,
|
| 27939 |
+
"step": 39820
|
| 27940 |
+
},
|
| 27941 |
+
{
|
| 27942 |
+
"epoch": 0.7391375954831512,
|
| 27943 |
+
"grad_norm": 35.53125,
|
| 27944 |
+
"learning_rate": 9.884509784193063e-06,
|
| 27945 |
+
"loss": 19.2286,
|
| 27946 |
+
"step": 39830
|
| 27947 |
+
},
|
| 27948 |
+
{
|
| 27949 |
+
"epoch": 0.7393231685676309,
|
| 27950 |
+
"grad_norm": 35.46875,
|
| 27951 |
+
"learning_rate": 9.884480788407022e-06,
|
| 27952 |
+
"loss": 19.1631,
|
| 27953 |
+
"step": 39840
|
| 27954 |
+
},
|
| 27955 |
+
{
|
| 27956 |
+
"epoch": 0.7395087416521108,
|
| 27957 |
+
"grad_norm": 35.96875,
|
| 27958 |
+
"learning_rate": 9.88445179262098e-06,
|
| 27959 |
+
"loss": 19.4365,
|
| 27960 |
+
"step": 39850
|
| 27961 |
+
},
|
| 27962 |
+
{
|
| 27963 |
+
"epoch": 0.7396943147365906,
|
| 27964 |
+
"grad_norm": 35.96875,
|
| 27965 |
+
"learning_rate": 9.884422796834937e-06,
|
| 27966 |
+
"loss": 19.5979,
|
| 27967 |
+
"step": 39860
|
| 27968 |
+
},
|
| 27969 |
+
{
|
| 27970 |
+
"epoch": 0.7398798878210704,
|
| 27971 |
+
"grad_norm": 36.9375,
|
| 27972 |
+
"learning_rate": 9.884393801048895e-06,
|
| 27973 |
+
"loss": 19.6595,
|
| 27974 |
+
"step": 39870
|
| 27975 |
+
},
|
| 27976 |
+
{
|
| 27977 |
+
"epoch": 0.7400654609055503,
|
| 27978 |
+
"grad_norm": 37.28125,
|
| 27979 |
+
"learning_rate": 9.884364805262852e-06,
|
| 27980 |
+
"loss": 19.4817,
|
| 27981 |
+
"step": 39880
|
| 27982 |
+
},
|
| 27983 |
+
{
|
| 27984 |
+
"epoch": 0.7402510339900301,
|
| 27985 |
+
"grad_norm": 36.03125,
|
| 27986 |
+
"learning_rate": 9.88433580947681e-06,
|
| 27987 |
+
"loss": 19.3088,
|
| 27988 |
+
"step": 39890
|
| 27989 |
+
},
|
| 27990 |
+
{
|
| 27991 |
+
"epoch": 0.7404366070745099,
|
| 27992 |
+
"grad_norm": 34.75,
|
| 27993 |
+
"learning_rate": 9.884306813690767e-06,
|
| 27994 |
+
"loss": 19.4918,
|
| 27995 |
+
"step": 39900
|
| 27996 |
+
},
|
| 27997 |
+
{
|
| 27998 |
+
"epoch": 0.7406221801589897,
|
| 27999 |
+
"grad_norm": 35.5,
|
| 28000 |
+
"learning_rate": 9.884277817904724e-06,
|
| 28001 |
+
"loss": 19.435,
|
| 28002 |
+
"step": 39910
|
| 28003 |
+
},
|
| 28004 |
+
{
|
| 28005 |
+
"epoch": 0.7408077532434696,
|
| 28006 |
+
"grad_norm": 33.78125,
|
| 28007 |
+
"learning_rate": 9.884248822118683e-06,
|
| 28008 |
+
"loss": 19.268,
|
| 28009 |
+
"step": 39920
|
| 28010 |
+
},
|
| 28011 |
+
{
|
| 28012 |
+
"epoch": 0.7409933263279493,
|
| 28013 |
+
"grad_norm": 37.5,
|
| 28014 |
+
"learning_rate": 9.884219826332639e-06,
|
| 28015 |
+
"loss": 19.885,
|
| 28016 |
+
"step": 39930
|
| 28017 |
+
},
|
| 28018 |
+
{
|
| 28019 |
+
"epoch": 0.7411788994124292,
|
| 28020 |
+
"grad_norm": 34.9375,
|
| 28021 |
+
"learning_rate": 9.884190830546596e-06,
|
| 28022 |
+
"loss": 19.6043,
|
| 28023 |
+
"step": 39940
|
| 28024 |
+
},
|
| 28025 |
+
{
|
| 28026 |
+
"epoch": 0.7413644724969091,
|
| 28027 |
+
"grad_norm": 36.65625,
|
| 28028 |
+
"learning_rate": 9.884161834760556e-06,
|
| 28029 |
+
"loss": 18.9762,
|
| 28030 |
+
"step": 39950
|
| 28031 |
+
},
|
| 28032 |
+
{
|
| 28033 |
+
"epoch": 0.7415500455813889,
|
| 28034 |
+
"grad_norm": 35.5,
|
| 28035 |
+
"learning_rate": 9.884132838974513e-06,
|
| 28036 |
+
"loss": 19.6159,
|
| 28037 |
+
"step": 39960
|
| 28038 |
+
},
|
| 28039 |
+
{
|
| 28040 |
+
"epoch": 0.7417356186658687,
|
| 28041 |
+
"grad_norm": 35.53125,
|
| 28042 |
+
"learning_rate": 9.88410384318847e-06,
|
| 28043 |
+
"loss": 19.5883,
|
| 28044 |
+
"step": 39970
|
| 28045 |
+
},
|
| 28046 |
+
{
|
| 28047 |
+
"epoch": 0.7419211917503485,
|
| 28048 |
+
"grad_norm": 35.21875,
|
| 28049 |
+
"learning_rate": 9.884074847402428e-06,
|
| 28050 |
+
"loss": 19.4468,
|
| 28051 |
+
"step": 39980
|
| 28052 |
+
},
|
| 28053 |
+
{
|
| 28054 |
+
"epoch": 0.7421067648348284,
|
| 28055 |
+
"grad_norm": 35.34375,
|
| 28056 |
+
"learning_rate": 9.884045851616385e-06,
|
| 28057 |
+
"loss": 19.7061,
|
| 28058 |
+
"step": 39990
|
| 28059 |
+
},
|
| 28060 |
+
{
|
| 28061 |
+
"epoch": 0.7422923379193082,
|
| 28062 |
+
"grad_norm": 37.5625,
|
| 28063 |
+
"learning_rate": 9.884016855830343e-06,
|
| 28064 |
+
"loss": 19.2119,
|
| 28065 |
+
"step": 40000
|
| 28066 |
+
},
|
| 28067 |
+
{
|
| 28068 |
+
"epoch": 0.7422923379193082,
|
| 28069 |
+
"eval_loss": 2.4196152687072754,
|
| 28070 |
+
"eval_runtime": 454.1953,
|
| 28071 |
+
"eval_samples_per_second": 3197.12,
|
| 28072 |
+
"eval_steps_per_second": 49.956,
|
| 28073 |
+
"step": 40000
|
| 28074 |
}
|
| 28075 |
],
|
| 28076 |
"logging_steps": 10,
|
|
|
|
| 28090 |
"attributes": {}
|
| 28091 |
}
|
| 28092 |
},
|
| 28093 |
+
"total_flos": 6.982091036164096e+18,
|
| 28094 |
"train_batch_size": 8,
|
| 28095 |
"trial_name": null,
|
| 28096 |
"trial_params": null
|