Training in progress, step 15600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a65f82baf85a00b5aab3be8707a0f09652ca90e9b6194cd3eb531d8dab5c3d5d
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00af2275574f38287d5f3e5810f003963cdce86c0b04da66a8460653b5c17c9c
|
| 3 |
size 173247691
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed3753ab7977739b8eda494dd72defae5750f7283141b11a8f562160ba4c1a23
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc9ebb824de4eaa12b7fc04a188ecf8fbf5f00d91334b27237c6c010a50c579a
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -107108,6 +107108,2106 @@
|
|
| 107108 |
"learning_rate": 5.031506787414764e-07,
|
| 107109 |
"loss": 0.7385158538818359,
|
| 107110 |
"step": 15300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107111 |
}
|
| 107112 |
],
|
| 107113 |
"logging_steps": 1,
|
|
@@ -107127,7 +109227,7 @@
|
|
| 107127 |
"attributes": {}
|
| 107128 |
}
|
| 107129 |
},
|
| 107130 |
-
"total_flos": 4.
|
| 107131 |
"train_batch_size": 8,
|
| 107132 |
"trial_name": null,
|
| 107133 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 19.141104294478527,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 107108 |
"learning_rate": 5.031506787414764e-07,
|
| 107109 |
"loss": 0.7385158538818359,
|
| 107110 |
"step": 15300
|
| 107111 |
+
},
|
| 107112 |
+
{
|
| 107113 |
+
"epoch": 18.774233128834357,
|
| 107114 |
+
"grad_norm": 0.23346325755119324,
|
| 107115 |
+
"learning_rate": 5.021492621404694e-07,
|
| 107116 |
+
"loss": 0.5686599016189575,
|
| 107117 |
+
"step": 15301
|
| 107118 |
+
},
|
| 107119 |
+
{
|
| 107120 |
+
"epoch": 18.775460122699386,
|
| 107121 |
+
"grad_norm": 0.30095958709716797,
|
| 107122 |
+
"learning_rate": 5.011488329779602e-07,
|
| 107123 |
+
"loss": 0.7451412677764893,
|
| 107124 |
+
"step": 15302
|
| 107125 |
+
},
|
| 107126 |
+
{
|
| 107127 |
+
"epoch": 18.776687116564418,
|
| 107128 |
+
"grad_norm": 0.26715636253356934,
|
| 107129 |
+
"learning_rate": 5.001493912942662e-07,
|
| 107130 |
+
"loss": 0.7142741680145264,
|
| 107131 |
+
"step": 15303
|
| 107132 |
+
},
|
| 107133 |
+
{
|
| 107134 |
+
"epoch": 18.77791411042945,
|
| 107135 |
+
"grad_norm": 0.2585250735282898,
|
| 107136 |
+
"learning_rate": 4.991509371296748e-07,
|
| 107137 |
+
"loss": 0.6067298650741577,
|
| 107138 |
+
"step": 15304
|
| 107139 |
+
},
|
| 107140 |
+
{
|
| 107141 |
+
"epoch": 18.779141104294478,
|
| 107142 |
+
"grad_norm": 0.22537195682525635,
|
| 107143 |
+
"learning_rate": 4.981534705244317e-07,
|
| 107144 |
+
"loss": 0.34548866748809814,
|
| 107145 |
+
"step": 15305
|
| 107146 |
+
},
|
| 107147 |
+
{
|
| 107148 |
+
"epoch": 18.78036809815951,
|
| 107149 |
+
"grad_norm": 0.2498675286769867,
|
| 107150 |
+
"learning_rate": 4.971569915187379e-07,
|
| 107151 |
+
"loss": 0.5628390312194824,
|
| 107152 |
+
"step": 15306
|
| 107153 |
+
},
|
| 107154 |
+
{
|
| 107155 |
+
"epoch": 18.78159509202454,
|
| 107156 |
+
"grad_norm": 0.28589847683906555,
|
| 107157 |
+
"learning_rate": 4.961615001527642e-07,
|
| 107158 |
+
"loss": 0.6326263546943665,
|
| 107159 |
+
"step": 15307
|
| 107160 |
+
},
|
| 107161 |
+
{
|
| 107162 |
+
"epoch": 18.78282208588957,
|
| 107163 |
+
"grad_norm": 0.2657351791858673,
|
| 107164 |
+
"learning_rate": 4.951669964666312e-07,
|
| 107165 |
+
"loss": 0.4277014136314392,
|
| 107166 |
+
"step": 15308
|
| 107167 |
+
},
|
| 107168 |
+
{
|
| 107169 |
+
"epoch": 18.784049079754602,
|
| 107170 |
+
"grad_norm": 0.2827761471271515,
|
| 107171 |
+
"learning_rate": 4.941734805004289e-07,
|
| 107172 |
+
"loss": 0.8661507368087769,
|
| 107173 |
+
"step": 15309
|
| 107174 |
+
},
|
| 107175 |
+
{
|
| 107176 |
+
"epoch": 18.78527607361963,
|
| 107177 |
+
"grad_norm": 0.29832637310028076,
|
| 107178 |
+
"learning_rate": 4.931809522942005e-07,
|
| 107179 |
+
"loss": 0.698661208152771,
|
| 107180 |
+
"step": 15310
|
| 107181 |
+
},
|
| 107182 |
+
{
|
| 107183 |
+
"epoch": 18.786503067484663,
|
| 107184 |
+
"grad_norm": 0.25398361682891846,
|
| 107185 |
+
"learning_rate": 4.9218941188795e-07,
|
| 107186 |
+
"loss": 0.5759528279304504,
|
| 107187 |
+
"step": 15311
|
| 107188 |
+
},
|
| 107189 |
+
{
|
| 107190 |
+
"epoch": 18.787730061349695,
|
| 107191 |
+
"grad_norm": 0.24753601849079132,
|
| 107192 |
+
"learning_rate": 4.911988593216455e-07,
|
| 107193 |
+
"loss": 0.6113766431808472,
|
| 107194 |
+
"step": 15312
|
| 107195 |
+
},
|
| 107196 |
+
{
|
| 107197 |
+
"epoch": 18.788957055214723,
|
| 107198 |
+
"grad_norm": 0.2627313435077667,
|
| 107199 |
+
"learning_rate": 4.902092946352188e-07,
|
| 107200 |
+
"loss": 0.8472735285758972,
|
| 107201 |
+
"step": 15313
|
| 107202 |
+
},
|
| 107203 |
+
{
|
| 107204 |
+
"epoch": 18.790184049079755,
|
| 107205 |
+
"grad_norm": 0.24352765083312988,
|
| 107206 |
+
"learning_rate": 4.892207178685465e-07,
|
| 107207 |
+
"loss": 0.5096030235290527,
|
| 107208 |
+
"step": 15314
|
| 107209 |
+
},
|
| 107210 |
+
{
|
| 107211 |
+
"epoch": 18.791411042944784,
|
| 107212 |
+
"grad_norm": 0.25554129481315613,
|
| 107213 |
+
"learning_rate": 4.8823312906148e-07,
|
| 107214 |
+
"loss": 0.48963814973831177,
|
| 107215 |
+
"step": 15315
|
| 107216 |
+
},
|
| 107217 |
+
{
|
| 107218 |
+
"epoch": 18.792638036809816,
|
| 107219 |
+
"grad_norm": 0.2661280632019043,
|
| 107220 |
+
"learning_rate": 4.872465282538291e-07,
|
| 107221 |
+
"loss": 0.7096434235572815,
|
| 107222 |
+
"step": 15316
|
| 107223 |
+
},
|
| 107224 |
+
{
|
| 107225 |
+
"epoch": 18.793865030674848,
|
| 107226 |
+
"grad_norm": 0.2772790193557739,
|
| 107227 |
+
"learning_rate": 4.862609154853564e-07,
|
| 107228 |
+
"loss": 0.3714704215526581,
|
| 107229 |
+
"step": 15317
|
| 107230 |
+
},
|
| 107231 |
+
{
|
| 107232 |
+
"epoch": 18.795092024539876,
|
| 107233 |
+
"grad_norm": 0.2562744617462158,
|
| 107234 |
+
"learning_rate": 4.852762907957941e-07,
|
| 107235 |
+
"loss": 0.5489447116851807,
|
| 107236 |
+
"step": 15318
|
| 107237 |
+
},
|
| 107238 |
+
{
|
| 107239 |
+
"epoch": 18.79631901840491,
|
| 107240 |
+
"grad_norm": 0.2723841071128845,
|
| 107241 |
+
"learning_rate": 4.842926542248216e-07,
|
| 107242 |
+
"loss": 0.7756527066230774,
|
| 107243 |
+
"step": 15319
|
| 107244 |
+
},
|
| 107245 |
+
{
|
| 107246 |
+
"epoch": 18.79754601226994,
|
| 107247 |
+
"grad_norm": 0.2623818516731262,
|
| 107248 |
+
"learning_rate": 4.833100058120932e-07,
|
| 107249 |
+
"loss": 0.6893080472946167,
|
| 107250 |
+
"step": 15320
|
| 107251 |
+
},
|
| 107252 |
+
{
|
| 107253 |
+
"epoch": 18.79877300613497,
|
| 107254 |
+
"grad_norm": 0.29040658473968506,
|
| 107255 |
+
"learning_rate": 4.823283455972133e-07,
|
| 107256 |
+
"loss": 0.6836308240890503,
|
| 107257 |
+
"step": 15321
|
| 107258 |
+
},
|
| 107259 |
+
{
|
| 107260 |
+
"epoch": 18.8,
|
| 107261 |
+
"grad_norm": 0.2843216359615326,
|
| 107262 |
+
"learning_rate": 4.813476736197531e-07,
|
| 107263 |
+
"loss": 0.4764214754104614,
|
| 107264 |
+
"step": 15322
|
| 107265 |
+
},
|
| 107266 |
+
{
|
| 107267 |
+
"epoch": 18.80122699386503,
|
| 107268 |
+
"grad_norm": 0.2815638482570648,
|
| 107269 |
+
"learning_rate": 4.803679899192392e-07,
|
| 107270 |
+
"loss": 0.6921877861022949,
|
| 107271 |
+
"step": 15323
|
| 107272 |
+
},
|
| 107273 |
+
{
|
| 107274 |
+
"epoch": 18.80245398773006,
|
| 107275 |
+
"grad_norm": 0.27609118819236755,
|
| 107276 |
+
"learning_rate": 4.793892945351597e-07,
|
| 107277 |
+
"loss": 0.45421868562698364,
|
| 107278 |
+
"step": 15324
|
| 107279 |
+
},
|
| 107280 |
+
{
|
| 107281 |
+
"epoch": 18.803680981595093,
|
| 107282 |
+
"grad_norm": 0.2480902224779129,
|
| 107283 |
+
"learning_rate": 4.784115875069606e-07,
|
| 107284 |
+
"loss": 0.6213089227676392,
|
| 107285 |
+
"step": 15325
|
| 107286 |
+
},
|
| 107287 |
+
{
|
| 107288 |
+
"epoch": 18.80490797546012,
|
| 107289 |
+
"grad_norm": 0.3058662414550781,
|
| 107290 |
+
"learning_rate": 4.774348688740548e-07,
|
| 107291 |
+
"loss": 0.6734089851379395,
|
| 107292 |
+
"step": 15326
|
| 107293 |
+
},
|
| 107294 |
+
{
|
| 107295 |
+
"epoch": 18.806134969325154,
|
| 107296 |
+
"grad_norm": 0.2818622291088104,
|
| 107297 |
+
"learning_rate": 4.764591386758055e-07,
|
| 107298 |
+
"loss": 0.7876219749450684,
|
| 107299 |
+
"step": 15327
|
| 107300 |
+
},
|
| 107301 |
+
{
|
| 107302 |
+
"epoch": 18.807361963190186,
|
| 107303 |
+
"grad_norm": 0.2581932842731476,
|
| 107304 |
+
"learning_rate": 4.754843969515477e-07,
|
| 107305 |
+
"loss": 0.5179560780525208,
|
| 107306 |
+
"step": 15328
|
| 107307 |
+
},
|
| 107308 |
+
{
|
| 107309 |
+
"epoch": 18.808588957055214,
|
| 107310 |
+
"grad_norm": 0.25926294922828674,
|
| 107311 |
+
"learning_rate": 4.745106437405694e-07,
|
| 107312 |
+
"loss": 0.6030786037445068,
|
| 107313 |
+
"step": 15329
|
| 107314 |
+
},
|
| 107315 |
+
{
|
| 107316 |
+
"epoch": 18.809815950920246,
|
| 107317 |
+
"grad_norm": 0.3052835166454315,
|
| 107318 |
+
"learning_rate": 4.735378790821143e-07,
|
| 107319 |
+
"loss": 0.8870722055435181,
|
| 107320 |
+
"step": 15330
|
| 107321 |
+
},
|
| 107322 |
+
{
|
| 107323 |
+
"epoch": 18.811042944785274,
|
| 107324 |
+
"grad_norm": 0.2827976942062378,
|
| 107325 |
+
"learning_rate": 4.7256610301539827e-07,
|
| 107326 |
+
"loss": 0.5485289692878723,
|
| 107327 |
+
"step": 15331
|
| 107328 |
+
},
|
| 107329 |
+
{
|
| 107330 |
+
"epoch": 18.812269938650306,
|
| 107331 |
+
"grad_norm": 0.2819404602050781,
|
| 107332 |
+
"learning_rate": 4.715953155795871e-07,
|
| 107333 |
+
"loss": 0.7439137697219849,
|
| 107334 |
+
"step": 15332
|
| 107335 |
+
},
|
| 107336 |
+
{
|
| 107337 |
+
"epoch": 18.81349693251534,
|
| 107338 |
+
"grad_norm": 0.22877560555934906,
|
| 107339 |
+
"learning_rate": 4.7062551681381074e-07,
|
| 107340 |
+
"loss": 0.34540295600891113,
|
| 107341 |
+
"step": 15333
|
| 107342 |
+
},
|
| 107343 |
+
{
|
| 107344 |
+
"epoch": 18.814723926380367,
|
| 107345 |
+
"grad_norm": 0.2862103283405304,
|
| 107346 |
+
"learning_rate": 4.696567067571628e-07,
|
| 107347 |
+
"loss": 0.6839549541473389,
|
| 107348 |
+
"step": 15334
|
| 107349 |
+
},
|
| 107350 |
+
{
|
| 107351 |
+
"epoch": 18.8159509202454,
|
| 107352 |
+
"grad_norm": 0.25082194805145264,
|
| 107353 |
+
"learning_rate": 4.6868888544868704e-07,
|
| 107354 |
+
"loss": 0.6146738529205322,
|
| 107355 |
+
"step": 15335
|
| 107356 |
+
},
|
| 107357 |
+
{
|
| 107358 |
+
"epoch": 18.81717791411043,
|
| 107359 |
+
"grad_norm": 0.21773378551006317,
|
| 107360 |
+
"learning_rate": 4.677220529273968e-07,
|
| 107361 |
+
"loss": 0.43376481533050537,
|
| 107362 |
+
"step": 15336
|
| 107363 |
+
},
|
| 107364 |
+
{
|
| 107365 |
+
"epoch": 18.81840490797546,
|
| 107366 |
+
"grad_norm": 0.27115803956985474,
|
| 107367 |
+
"learning_rate": 4.6675620923226357e-07,
|
| 107368 |
+
"loss": 0.4232448637485504,
|
| 107369 |
+
"step": 15337
|
| 107370 |
+
},
|
| 107371 |
+
{
|
| 107372 |
+
"epoch": 18.81963190184049,
|
| 107373 |
+
"grad_norm": 0.2727295756340027,
|
| 107374 |
+
"learning_rate": 4.6579135440221744e-07,
|
| 107375 |
+
"loss": 0.6450777053833008,
|
| 107376 |
+
"step": 15338
|
| 107377 |
+
},
|
| 107378 |
+
{
|
| 107379 |
+
"epoch": 18.82085889570552,
|
| 107380 |
+
"grad_norm": 0.2559374272823334,
|
| 107381 |
+
"learning_rate": 4.6482748847614666e-07,
|
| 107382 |
+
"loss": 0.5385802388191223,
|
| 107383 |
+
"step": 15339
|
| 107384 |
+
},
|
| 107385 |
+
{
|
| 107386 |
+
"epoch": 18.822085889570552,
|
| 107387 |
+
"grad_norm": 0.28574639558792114,
|
| 107388 |
+
"learning_rate": 4.638646114929035e-07,
|
| 107389 |
+
"loss": 0.6534530520439148,
|
| 107390 |
+
"step": 15340
|
| 107391 |
+
},
|
| 107392 |
+
{
|
| 107393 |
+
"epoch": 18.823312883435584,
|
| 107394 |
+
"grad_norm": 0.26913225650787354,
|
| 107395 |
+
"learning_rate": 4.629027234912986e-07,
|
| 107396 |
+
"loss": 0.5204800367355347,
|
| 107397 |
+
"step": 15341
|
| 107398 |
+
},
|
| 107399 |
+
{
|
| 107400 |
+
"epoch": 18.824539877300612,
|
| 107401 |
+
"grad_norm": 0.24075192213058472,
|
| 107402 |
+
"learning_rate": 4.619418245101037e-07,
|
| 107403 |
+
"loss": 0.5587427020072937,
|
| 107404 |
+
"step": 15342
|
| 107405 |
+
},
|
| 107406 |
+
{
|
| 107407 |
+
"epoch": 18.825766871165644,
|
| 107408 |
+
"grad_norm": 0.26131388545036316,
|
| 107409 |
+
"learning_rate": 4.609819145880517e-07,
|
| 107410 |
+
"loss": 0.6891093850135803,
|
| 107411 |
+
"step": 15343
|
| 107412 |
+
},
|
| 107413 |
+
{
|
| 107414 |
+
"epoch": 18.826993865030676,
|
| 107415 |
+
"grad_norm": 0.2937684655189514,
|
| 107416 |
+
"learning_rate": 4.600229937638284e-07,
|
| 107417 |
+
"loss": 0.7604595422744751,
|
| 107418 |
+
"step": 15344
|
| 107419 |
+
},
|
| 107420 |
+
{
|
| 107421 |
+
"epoch": 18.828220858895705,
|
| 107422 |
+
"grad_norm": 0.2711790204048157,
|
| 107423 |
+
"learning_rate": 4.5906506207608614e-07,
|
| 107424 |
+
"loss": 0.6121951341629028,
|
| 107425 |
+
"step": 15345
|
| 107426 |
+
},
|
| 107427 |
+
{
|
| 107428 |
+
"epoch": 18.829447852760737,
|
| 107429 |
+
"grad_norm": 0.25094518065452576,
|
| 107430 |
+
"learning_rate": 4.5810811956344126e-07,
|
| 107431 |
+
"loss": 0.5073522329330444,
|
| 107432 |
+
"step": 15346
|
| 107433 |
+
},
|
| 107434 |
+
{
|
| 107435 |
+
"epoch": 18.830674846625765,
|
| 107436 |
+
"grad_norm": 0.2741771340370178,
|
| 107437 |
+
"learning_rate": 4.571521662644601e-07,
|
| 107438 |
+
"loss": 0.6544768214225769,
|
| 107439 |
+
"step": 15347
|
| 107440 |
+
},
|
| 107441 |
+
{
|
| 107442 |
+
"epoch": 18.831901840490797,
|
| 107443 |
+
"grad_norm": 0.24836848676204681,
|
| 107444 |
+
"learning_rate": 4.561972022176786e-07,
|
| 107445 |
+
"loss": 0.4796122908592224,
|
| 107446 |
+
"step": 15348
|
| 107447 |
+
},
|
| 107448 |
+
{
|
| 107449 |
+
"epoch": 18.83312883435583,
|
| 107450 |
+
"grad_norm": 0.28345876932144165,
|
| 107451 |
+
"learning_rate": 4.552432274615853e-07,
|
| 107452 |
+
"loss": 0.6765681505203247,
|
| 107453 |
+
"step": 15349
|
| 107454 |
+
},
|
| 107455 |
+
{
|
| 107456 |
+
"epoch": 18.834355828220858,
|
| 107457 |
+
"grad_norm": 0.2971680164337158,
|
| 107458 |
+
"learning_rate": 4.5429024203463566e-07,
|
| 107459 |
+
"loss": 0.3718155026435852,
|
| 107460 |
+
"step": 15350
|
| 107461 |
+
},
|
| 107462 |
+
{
|
| 107463 |
+
"epoch": 18.83558282208589,
|
| 107464 |
+
"grad_norm": 0.5132148265838623,
|
| 107465 |
+
"learning_rate": 4.533382459752378e-07,
|
| 107466 |
+
"loss": 0.5887055993080139,
|
| 107467 |
+
"step": 15351
|
| 107468 |
+
},
|
| 107469 |
+
{
|
| 107470 |
+
"epoch": 18.83680981595092,
|
| 107471 |
+
"grad_norm": 0.2500431537628174,
|
| 107472 |
+
"learning_rate": 4.523872393217665e-07,
|
| 107473 |
+
"loss": 0.656025230884552,
|
| 107474 |
+
"step": 15352
|
| 107475 |
+
},
|
| 107476 |
+
{
|
| 107477 |
+
"epoch": 18.83803680981595,
|
| 107478 |
+
"grad_norm": 0.2625158131122589,
|
| 107479 |
+
"learning_rate": 4.514372221125551e-07,
|
| 107480 |
+
"loss": 0.5767805576324463,
|
| 107481 |
+
"step": 15353
|
| 107482 |
+
},
|
| 107483 |
+
{
|
| 107484 |
+
"epoch": 18.839263803680982,
|
| 107485 |
+
"grad_norm": 0.2744707763195038,
|
| 107486 |
+
"learning_rate": 4.504881943858924e-07,
|
| 107487 |
+
"loss": 0.696954607963562,
|
| 107488 |
+
"step": 15354
|
| 107489 |
+
},
|
| 107490 |
+
{
|
| 107491 |
+
"epoch": 18.84049079754601,
|
| 107492 |
+
"grad_norm": 0.2938406467437744,
|
| 107493 |
+
"learning_rate": 4.4954015618003386e-07,
|
| 107494 |
+
"loss": 0.619251549243927,
|
| 107495 |
+
"step": 15355
|
| 107496 |
+
},
|
| 107497 |
+
{
|
| 107498 |
+
"epoch": 18.841717791411043,
|
| 107499 |
+
"grad_norm": 0.248630091547966,
|
| 107500 |
+
"learning_rate": 4.485931075331934e-07,
|
| 107501 |
+
"loss": 0.6851462125778198,
|
| 107502 |
+
"step": 15356
|
| 107503 |
+
},
|
| 107504 |
+
{
|
| 107505 |
+
"epoch": 18.842944785276075,
|
| 107506 |
+
"grad_norm": 0.27372094988822937,
|
| 107507 |
+
"learning_rate": 4.4764704848354046e-07,
|
| 107508 |
+
"loss": 0.6563066244125366,
|
| 107509 |
+
"step": 15357
|
| 107510 |
+
},
|
| 107511 |
+
{
|
| 107512 |
+
"epoch": 18.844171779141103,
|
| 107513 |
+
"grad_norm": 0.26262858510017395,
|
| 107514 |
+
"learning_rate": 4.467019790692084e-07,
|
| 107515 |
+
"loss": 0.693672239780426,
|
| 107516 |
+
"step": 15358
|
| 107517 |
+
},
|
| 107518 |
+
{
|
| 107519 |
+
"epoch": 18.845398773006135,
|
| 107520 |
+
"grad_norm": 0.26992329955101013,
|
| 107521 |
+
"learning_rate": 4.457578993282918e-07,
|
| 107522 |
+
"loss": 0.586786687374115,
|
| 107523 |
+
"step": 15359
|
| 107524 |
+
},
|
| 107525 |
+
{
|
| 107526 |
+
"epoch": 18.846625766871167,
|
| 107527 |
+
"grad_norm": 0.26409637928009033,
|
| 107528 |
+
"learning_rate": 4.4481480929884355e-07,
|
| 107529 |
+
"loss": 0.6994605660438538,
|
| 107530 |
+
"step": 15360
|
| 107531 |
+
},
|
| 107532 |
+
{
|
| 107533 |
+
"epoch": 18.847852760736195,
|
| 107534 |
+
"grad_norm": 0.29597005248069763,
|
| 107535 |
+
"learning_rate": 4.4387270901887766e-07,
|
| 107536 |
+
"loss": 0.6220334768295288,
|
| 107537 |
+
"step": 15361
|
| 107538 |
+
},
|
| 107539 |
+
{
|
| 107540 |
+
"epoch": 18.849079754601227,
|
| 107541 |
+
"grad_norm": 0.2887323498725891,
|
| 107542 |
+
"learning_rate": 4.429315985263666e-07,
|
| 107543 |
+
"loss": 0.5639554262161255,
|
| 107544 |
+
"step": 15362
|
| 107545 |
+
},
|
| 107546 |
+
{
|
| 107547 |
+
"epoch": 18.85030674846626,
|
| 107548 |
+
"grad_norm": 0.2765934467315674,
|
| 107549 |
+
"learning_rate": 4.419914778592438e-07,
|
| 107550 |
+
"loss": 0.46188828349113464,
|
| 107551 |
+
"step": 15363
|
| 107552 |
+
},
|
| 107553 |
+
{
|
| 107554 |
+
"epoch": 18.851533742331288,
|
| 107555 |
+
"grad_norm": 0.23929435014724731,
|
| 107556 |
+
"learning_rate": 4.4105234705540123e-07,
|
| 107557 |
+
"loss": 0.5312220454216003,
|
| 107558 |
+
"step": 15364
|
| 107559 |
+
},
|
| 107560 |
+
{
|
| 107561 |
+
"epoch": 18.85276073619632,
|
| 107562 |
+
"grad_norm": 0.28333696722984314,
|
| 107563 |
+
"learning_rate": 4.4011420615269473e-07,
|
| 107564 |
+
"loss": 0.425798237323761,
|
| 107565 |
+
"step": 15365
|
| 107566 |
+
},
|
| 107567 |
+
{
|
| 107568 |
+
"epoch": 18.85398773006135,
|
| 107569 |
+
"grad_norm": 0.24437132477760315,
|
| 107570 |
+
"learning_rate": 4.391770551889385e-07,
|
| 107571 |
+
"loss": 0.5200620889663696,
|
| 107572 |
+
"step": 15366
|
| 107573 |
+
},
|
| 107574 |
+
{
|
| 107575 |
+
"epoch": 18.85521472392638,
|
| 107576 |
+
"grad_norm": 0.2677897810935974,
|
| 107577 |
+
"learning_rate": 4.382408942019078e-07,
|
| 107578 |
+
"loss": 0.680181622505188,
|
| 107579 |
+
"step": 15367
|
| 107580 |
+
},
|
| 107581 |
+
{
|
| 107582 |
+
"epoch": 18.856441717791412,
|
| 107583 |
+
"grad_norm": 0.27694380283355713,
|
| 107584 |
+
"learning_rate": 4.3730572322933093e-07,
|
| 107585 |
+
"loss": 0.6106482148170471,
|
| 107586 |
+
"step": 15368
|
| 107587 |
+
},
|
| 107588 |
+
{
|
| 107589 |
+
"epoch": 18.85766871165644,
|
| 107590 |
+
"grad_norm": 0.24693812429904938,
|
| 107591 |
+
"learning_rate": 4.363715423089054e-07,
|
| 107592 |
+
"loss": 0.5018847584724426,
|
| 107593 |
+
"step": 15369
|
| 107594 |
+
},
|
| 107595 |
+
{
|
| 107596 |
+
"epoch": 18.858895705521473,
|
| 107597 |
+
"grad_norm": 0.2784872055053711,
|
| 107598 |
+
"learning_rate": 4.3543835147828725e-07,
|
| 107599 |
+
"loss": 0.5582388639450073,
|
| 107600 |
+
"step": 15370
|
| 107601 |
+
},
|
| 107602 |
+
{
|
| 107603 |
+
"epoch": 18.860122699386505,
|
| 107604 |
+
"grad_norm": 0.2811260223388672,
|
| 107605 |
+
"learning_rate": 4.345061507750853e-07,
|
| 107606 |
+
"loss": 0.5976800918579102,
|
| 107607 |
+
"step": 15371
|
| 107608 |
+
},
|
| 107609 |
+
{
|
| 107610 |
+
"epoch": 18.861349693251533,
|
| 107611 |
+
"grad_norm": 0.26563286781311035,
|
| 107612 |
+
"learning_rate": 4.3357494023688326e-07,
|
| 107613 |
+
"loss": 0.5696654319763184,
|
| 107614 |
+
"step": 15372
|
| 107615 |
+
},
|
| 107616 |
+
{
|
| 107617 |
+
"epoch": 18.862576687116565,
|
| 107618 |
+
"grad_norm": 0.2658173143863678,
|
| 107619 |
+
"learning_rate": 4.326447199012068e-07,
|
| 107620 |
+
"loss": 0.5610763430595398,
|
| 107621 |
+
"step": 15373
|
| 107622 |
+
},
|
| 107623 |
+
{
|
| 107624 |
+
"epoch": 18.863803680981594,
|
| 107625 |
+
"grad_norm": 0.24842970073223114,
|
| 107626 |
+
"learning_rate": 4.317154898055509e-07,
|
| 107627 |
+
"loss": 0.6347891092300415,
|
| 107628 |
+
"step": 15374
|
| 107629 |
+
},
|
| 107630 |
+
{
|
| 107631 |
+
"epoch": 18.865030674846626,
|
| 107632 |
+
"grad_norm": 0.24308282136917114,
|
| 107633 |
+
"learning_rate": 4.3078724998737443e-07,
|
| 107634 |
+
"loss": 0.5663868188858032,
|
| 107635 |
+
"step": 15375
|
| 107636 |
+
},
|
| 107637 |
+
{
|
| 107638 |
+
"epoch": 18.866257668711658,
|
| 107639 |
+
"grad_norm": 0.22023813426494598,
|
| 107640 |
+
"learning_rate": 4.2986000048409194e-07,
|
| 107641 |
+
"loss": 0.37273138761520386,
|
| 107642 |
+
"step": 15376
|
| 107643 |
+
},
|
| 107644 |
+
{
|
| 107645 |
+
"epoch": 18.867484662576686,
|
| 107646 |
+
"grad_norm": 0.28790926933288574,
|
| 107647 |
+
"learning_rate": 4.289337413330763e-07,
|
| 107648 |
+
"loss": 0.52947598695755,
|
| 107649 |
+
"step": 15377
|
| 107650 |
+
},
|
| 107651 |
+
{
|
| 107652 |
+
"epoch": 18.868711656441718,
|
| 107653 |
+
"grad_norm": 0.25959306955337524,
|
| 107654 |
+
"learning_rate": 4.280084725716615e-07,
|
| 107655 |
+
"loss": 0.5756335258483887,
|
| 107656 |
+
"step": 15378
|
| 107657 |
+
},
|
| 107658 |
+
{
|
| 107659 |
+
"epoch": 18.86993865030675,
|
| 107660 |
+
"grad_norm": 0.2888113558292389,
|
| 107661 |
+
"learning_rate": 4.2708419423714006e-07,
|
| 107662 |
+
"loss": 0.64225172996521,
|
| 107663 |
+
"step": 15379
|
| 107664 |
+
},
|
| 107665 |
+
{
|
| 107666 |
+
"epoch": 18.87116564417178,
|
| 107667 |
+
"grad_norm": 0.30166149139404297,
|
| 107668 |
+
"learning_rate": 4.2616090636677374e-07,
|
| 107669 |
+
"loss": 0.4458548426628113,
|
| 107670 |
+
"step": 15380
|
| 107671 |
+
},
|
| 107672 |
+
{
|
| 107673 |
+
"epoch": 18.87239263803681,
|
| 107674 |
+
"grad_norm": 0.2879417836666107,
|
| 107675 |
+
"learning_rate": 4.252386089977772e-07,
|
| 107676 |
+
"loss": 0.6099374890327454,
|
| 107677 |
+
"step": 15381
|
| 107678 |
+
},
|
| 107679 |
+
{
|
| 107680 |
+
"epoch": 18.87361963190184,
|
| 107681 |
+
"grad_norm": 0.2846284508705139,
|
| 107682 |
+
"learning_rate": 4.243173021673208e-07,
|
| 107683 |
+
"loss": 0.7146259546279907,
|
| 107684 |
+
"step": 15382
|
| 107685 |
+
},
|
| 107686 |
+
{
|
| 107687 |
+
"epoch": 18.87484662576687,
|
| 107688 |
+
"grad_norm": 0.2766912579536438,
|
| 107689 |
+
"learning_rate": 4.233969859125414e-07,
|
| 107690 |
+
"loss": 0.6318878531455994,
|
| 107691 |
+
"step": 15383
|
| 107692 |
+
},
|
| 107693 |
+
{
|
| 107694 |
+
"epoch": 18.876073619631903,
|
| 107695 |
+
"grad_norm": 0.2662307024002075,
|
| 107696 |
+
"learning_rate": 4.224776602705371e-07,
|
| 107697 |
+
"loss": 0.6410751938819885,
|
| 107698 |
+
"step": 15384
|
| 107699 |
+
},
|
| 107700 |
+
{
|
| 107701 |
+
"epoch": 18.87730061349693,
|
| 107702 |
+
"grad_norm": 0.28768855333328247,
|
| 107703 |
+
"learning_rate": 4.2155932527835897e-07,
|
| 107704 |
+
"loss": 0.6313656568527222,
|
| 107705 |
+
"step": 15385
|
| 107706 |
+
},
|
| 107707 |
+
{
|
| 107708 |
+
"epoch": 18.878527607361963,
|
| 107709 |
+
"grad_norm": 0.2634570896625519,
|
| 107710 |
+
"learning_rate": 4.206419809730244e-07,
|
| 107711 |
+
"loss": 0.5571842193603516,
|
| 107712 |
+
"step": 15386
|
| 107713 |
+
},
|
| 107714 |
+
{
|
| 107715 |
+
"epoch": 18.879754601226995,
|
| 107716 |
+
"grad_norm": 0.2558663487434387,
|
| 107717 |
+
"learning_rate": 4.1972562739150957e-07,
|
| 107718 |
+
"loss": 0.6955661177635193,
|
| 107719 |
+
"step": 15387
|
| 107720 |
+
},
|
| 107721 |
+
{
|
| 107722 |
+
"epoch": 18.880981595092024,
|
| 107723 |
+
"grad_norm": 0.2200579047203064,
|
| 107724 |
+
"learning_rate": 4.188102645707487e-07,
|
| 107725 |
+
"loss": 0.41217517852783203,
|
| 107726 |
+
"step": 15388
|
| 107727 |
+
},
|
| 107728 |
+
{
|
| 107729 |
+
"epoch": 18.882208588957056,
|
| 107730 |
+
"grad_norm": 0.2848440408706665,
|
| 107731 |
+
"learning_rate": 4.178958925476401e-07,
|
| 107732 |
+
"loss": 0.6014900207519531,
|
| 107733 |
+
"step": 15389
|
| 107734 |
+
},
|
| 107735 |
+
{
|
| 107736 |
+
"epoch": 18.883435582822084,
|
| 107737 |
+
"grad_norm": 0.22961875796318054,
|
| 107738 |
+
"learning_rate": 4.1698251135903754e-07,
|
| 107739 |
+
"loss": 0.24417629837989807,
|
| 107740 |
+
"step": 15390
|
| 107741 |
+
},
|
| 107742 |
+
{
|
| 107743 |
+
"epoch": 18.884662576687116,
|
| 107744 |
+
"grad_norm": 0.25900471210479736,
|
| 107745 |
+
"learning_rate": 4.1607012104175614e-07,
|
| 107746 |
+
"loss": 0.4764086902141571,
|
| 107747 |
+
"step": 15391
|
| 107748 |
+
},
|
| 107749 |
+
{
|
| 107750 |
+
"epoch": 18.88588957055215,
|
| 107751 |
+
"grad_norm": 0.2680763602256775,
|
| 107752 |
+
"learning_rate": 4.1515872163257197e-07,
|
| 107753 |
+
"loss": 0.7528952956199646,
|
| 107754 |
+
"step": 15392
|
| 107755 |
+
},
|
| 107756 |
+
{
|
| 107757 |
+
"epoch": 18.887116564417177,
|
| 107758 |
+
"grad_norm": 0.25390854477882385,
|
| 107759 |
+
"learning_rate": 4.1424831316822235e-07,
|
| 107760 |
+
"loss": 0.48829060792922974,
|
| 107761 |
+
"step": 15393
|
| 107762 |
+
},
|
| 107763 |
+
{
|
| 107764 |
+
"epoch": 18.88834355828221,
|
| 107765 |
+
"grad_norm": 0.2617965340614319,
|
| 107766 |
+
"learning_rate": 4.1333889568540284e-07,
|
| 107767 |
+
"loss": 0.566694974899292,
|
| 107768 |
+
"step": 15394
|
| 107769 |
+
},
|
| 107770 |
+
{
|
| 107771 |
+
"epoch": 18.88957055214724,
|
| 107772 |
+
"grad_norm": 0.24462890625,
|
| 107773 |
+
"learning_rate": 4.1243046922076755e-07,
|
| 107774 |
+
"loss": 0.49365583062171936,
|
| 107775 |
+
"step": 15395
|
| 107776 |
+
},
|
| 107777 |
+
{
|
| 107778 |
+
"epoch": 18.89079754601227,
|
| 107779 |
+
"grad_norm": 0.2941041886806488,
|
| 107780 |
+
"learning_rate": 4.1152303381093713e-07,
|
| 107781 |
+
"loss": 0.7954063415527344,
|
| 107782 |
+
"step": 15396
|
| 107783 |
+
},
|
| 107784 |
+
{
|
| 107785 |
+
"epoch": 18.8920245398773,
|
| 107786 |
+
"grad_norm": 0.26340603828430176,
|
| 107787 |
+
"learning_rate": 4.106165894924824e-07,
|
| 107788 |
+
"loss": 0.6104364395141602,
|
| 107789 |
+
"step": 15397
|
| 107790 |
+
},
|
| 107791 |
+
{
|
| 107792 |
+
"epoch": 18.89325153374233,
|
| 107793 |
+
"grad_norm": 0.3128635585308075,
|
| 107794 |
+
"learning_rate": 4.0971113630194345e-07,
|
| 107795 |
+
"loss": 0.6848822832107544,
|
| 107796 |
+
"step": 15398
|
| 107797 |
+
},
|
| 107798 |
+
{
|
| 107799 |
+
"epoch": 18.89447852760736,
|
| 107800 |
+
"grad_norm": 0.3111709654331207,
|
| 107801 |
+
"learning_rate": 4.0880667427581063e-07,
|
| 107802 |
+
"loss": 0.5258172750473022,
|
| 107803 |
+
"step": 15399
|
| 107804 |
+
},
|
| 107805 |
+
{
|
| 107806 |
+
"epoch": 18.895705521472394,
|
| 107807 |
+
"grad_norm": 0.2768288850784302,
|
| 107808 |
+
"learning_rate": 4.0790320345054923e-07,
|
| 107809 |
+
"loss": 0.5429795384407043,
|
| 107810 |
+
"step": 15400
|
| 107811 |
+
},
|
| 107812 |
+
{
|
| 107813 |
+
"epoch": 18.896932515337422,
|
| 107814 |
+
"grad_norm": 0.24435095489025116,
|
| 107815 |
+
"learning_rate": 4.070007238625689e-07,
|
| 107816 |
+
"loss": 0.4184759855270386,
|
| 107817 |
+
"step": 15401
|
| 107818 |
+
},
|
| 107819 |
+
{
|
| 107820 |
+
"epoch": 18.898159509202454,
|
| 107821 |
+
"grad_norm": 0.26221925020217896,
|
| 107822 |
+
"learning_rate": 4.0609923554824625e-07,
|
| 107823 |
+
"loss": 0.5773870944976807,
|
| 107824 |
+
"step": 15402
|
| 107825 |
+
},
|
| 107826 |
+
{
|
| 107827 |
+
"epoch": 18.899386503067486,
|
| 107828 |
+
"grad_norm": 0.2912936210632324,
|
| 107829 |
+
"learning_rate": 4.0519873854392155e-07,
|
| 107830 |
+
"loss": 0.8390700221061707,
|
| 107831 |
+
"step": 15403
|
| 107832 |
+
},
|
| 107833 |
+
{
|
| 107834 |
+
"epoch": 18.900613496932515,
|
| 107835 |
+
"grad_norm": 0.30612891912460327,
|
| 107836 |
+
"learning_rate": 4.04299232885888e-07,
|
| 107837 |
+
"loss": 0.74934983253479,
|
| 107838 |
+
"step": 15404
|
| 107839 |
+
},
|
| 107840 |
+
{
|
| 107841 |
+
"epoch": 18.901840490797547,
|
| 107842 |
+
"grad_norm": 0.276533842086792,
|
| 107843 |
+
"learning_rate": 4.034007186104055e-07,
|
| 107844 |
+
"loss": 0.7439364194869995,
|
| 107845 |
+
"step": 15405
|
| 107846 |
+
},
|
| 107847 |
+
{
|
| 107848 |
+
"epoch": 18.903067484662575,
|
| 107849 |
+
"grad_norm": 0.2950587570667267,
|
| 107850 |
+
"learning_rate": 4.025031957536868e-07,
|
| 107851 |
+
"loss": 0.664257287979126,
|
| 107852 |
+
"step": 15406
|
| 107853 |
+
},
|
| 107854 |
+
{
|
| 107855 |
+
"epoch": 18.904294478527607,
|
| 107856 |
+
"grad_norm": 0.22966299951076508,
|
| 107857 |
+
"learning_rate": 4.016066643519112e-07,
|
| 107858 |
+
"loss": 0.46799468994140625,
|
| 107859 |
+
"step": 15407
|
| 107860 |
+
},
|
| 107861 |
+
{
|
| 107862 |
+
"epoch": 18.90552147239264,
|
| 107863 |
+
"grad_norm": 0.2641526758670807,
|
| 107864 |
+
"learning_rate": 4.0071112444121374e-07,
|
| 107865 |
+
"loss": 0.718100905418396,
|
| 107866 |
+
"step": 15408
|
| 107867 |
+
},
|
| 107868 |
+
{
|
| 107869 |
+
"epoch": 18.906748466257667,
|
| 107870 |
+
"grad_norm": 0.24755476415157318,
|
| 107871 |
+
"learning_rate": 3.998165760576905e-07,
|
| 107872 |
+
"loss": 0.47016048431396484,
|
| 107873 |
+
"step": 15409
|
| 107874 |
+
},
|
| 107875 |
+
{
|
| 107876 |
+
"epoch": 18.9079754601227,
|
| 107877 |
+
"grad_norm": 0.24538715183734894,
|
| 107878 |
+
"learning_rate": 3.9892301923739884e-07,
|
| 107879 |
+
"loss": 0.39219364523887634,
|
| 107880 |
+
"step": 15410
|
| 107881 |
+
},
|
| 107882 |
+
{
|
| 107883 |
+
"epoch": 18.90920245398773,
|
| 107884 |
+
"grad_norm": 0.2751717269420624,
|
| 107885 |
+
"learning_rate": 3.980304540163571e-07,
|
| 107886 |
+
"loss": 0.5266256332397461,
|
| 107887 |
+
"step": 15411
|
| 107888 |
+
},
|
| 107889 |
+
{
|
| 107890 |
+
"epoch": 18.91042944785276,
|
| 107891 |
+
"grad_norm": 0.27194634079933167,
|
| 107892 |
+
"learning_rate": 3.9713888043053926e-07,
|
| 107893 |
+
"loss": 0.7351025342941284,
|
| 107894 |
+
"step": 15412
|
| 107895 |
+
},
|
| 107896 |
+
{
|
| 107897 |
+
"epoch": 18.911656441717792,
|
| 107898 |
+
"grad_norm": 0.2860781252384186,
|
| 107899 |
+
"learning_rate": 3.962482985158861e-07,
|
| 107900 |
+
"loss": 0.6614590287208557,
|
| 107901 |
+
"step": 15413
|
| 107902 |
+
},
|
| 107903 |
+
{
|
| 107904 |
+
"epoch": 18.91288343558282,
|
| 107905 |
+
"grad_norm": 0.2399091124534607,
|
| 107906 |
+
"learning_rate": 3.9535870830828827e-07,
|
| 107907 |
+
"loss": 0.4663470685482025,
|
| 107908 |
+
"step": 15414
|
| 107909 |
+
},
|
| 107910 |
+
{
|
| 107911 |
+
"epoch": 18.914110429447852,
|
| 107912 |
+
"grad_norm": 0.25810569524765015,
|
| 107913 |
+
"learning_rate": 3.9447010984361155e-07,
|
| 107914 |
+
"loss": 0.575016975402832,
|
| 107915 |
+
"step": 15415
|
| 107916 |
+
},
|
| 107917 |
+
{
|
| 107918 |
+
"epoch": 18.915337423312884,
|
| 107919 |
+
"grad_norm": 0.4573909640312195,
|
| 107920 |
+
"learning_rate": 3.935825031576634e-07,
|
| 107921 |
+
"loss": 0.5934346914291382,
|
| 107922 |
+
"step": 15416
|
| 107923 |
+
},
|
| 107924 |
+
{
|
| 107925 |
+
"epoch": 18.916564417177913,
|
| 107926 |
+
"grad_norm": 0.2702994644641876,
|
| 107927 |
+
"learning_rate": 3.926958882862264e-07,
|
| 107928 |
+
"loss": 0.5717799067497253,
|
| 107929 |
+
"step": 15417
|
| 107930 |
+
},
|
| 107931 |
+
{
|
| 107932 |
+
"epoch": 18.917791411042945,
|
| 107933 |
+
"grad_norm": 0.2768639922142029,
|
| 107934 |
+
"learning_rate": 3.918102652650329e-07,
|
| 107935 |
+
"loss": 0.5957989692687988,
|
| 107936 |
+
"step": 15418
|
| 107937 |
+
},
|
| 107938 |
+
{
|
| 107939 |
+
"epoch": 18.919018404907977,
|
| 107940 |
+
"grad_norm": 0.2596666216850281,
|
| 107941 |
+
"learning_rate": 3.909256341297851e-07,
|
| 107942 |
+
"loss": 0.5739258527755737,
|
| 107943 |
+
"step": 15419
|
| 107944 |
+
},
|
| 107945 |
+
{
|
| 107946 |
+
"epoch": 18.920245398773005,
|
| 107947 |
+
"grad_norm": 0.27424895763397217,
|
| 107948 |
+
"learning_rate": 3.9004199491614046e-07,
|
| 107949 |
+
"loss": 0.4615115225315094,
|
| 107950 |
+
"step": 15420
|
| 107951 |
+
},
|
| 107952 |
+
{
|
| 107953 |
+
"epoch": 18.921472392638037,
|
| 107954 |
+
"grad_norm": 0.4937061369419098,
|
| 107955 |
+
"learning_rate": 3.891593476597094e-07,
|
| 107956 |
+
"loss": 0.7324157357215881,
|
| 107957 |
+
"step": 15421
|
| 107958 |
+
},
|
| 107959 |
+
{
|
| 107960 |
+
"epoch": 18.92269938650307,
|
| 107961 |
+
"grad_norm": 0.25138941407203674,
|
| 107962 |
+
"learning_rate": 3.882776923960746e-07,
|
| 107963 |
+
"loss": 0.5318384170532227,
|
| 107964 |
+
"step": 15422
|
| 107965 |
+
},
|
| 107966 |
+
{
|
| 107967 |
+
"epoch": 18.923926380368098,
|
| 107968 |
+
"grad_norm": 0.27020174264907837,
|
| 107969 |
+
"learning_rate": 3.8739702916077147e-07,
|
| 107970 |
+
"loss": 0.7053074836730957,
|
| 107971 |
+
"step": 15423
|
| 107972 |
+
},
|
| 107973 |
+
{
|
| 107974 |
+
"epoch": 18.92515337423313,
|
| 107975 |
+
"grad_norm": 0.2275635153055191,
|
| 107976 |
+
"learning_rate": 3.8651735798929387e-07,
|
| 107977 |
+
"loss": 0.4795500636100769,
|
| 107978 |
+
"step": 15424
|
| 107979 |
+
},
|
| 107980 |
+
{
|
| 107981 |
+
"epoch": 18.926380368098158,
|
| 107982 |
+
"grad_norm": 0.2655569016933441,
|
| 107983 |
+
"learning_rate": 3.8563867891710234e-07,
|
| 107984 |
+
"loss": 0.508735716342926,
|
| 107985 |
+
"step": 15425
|
| 107986 |
+
},
|
| 107987 |
+
{
|
| 107988 |
+
"epoch": 18.92760736196319,
|
| 107989 |
+
"grad_norm": 0.269887238740921,
|
| 107990 |
+
"learning_rate": 3.847609919796158e-07,
|
| 107991 |
+
"loss": 0.6576066017150879,
|
| 107992 |
+
"step": 15426
|
| 107993 |
+
},
|
| 107994 |
+
{
|
| 107995 |
+
"epoch": 18.928834355828222,
|
| 107996 |
+
"grad_norm": 0.28394457697868347,
|
| 107997 |
+
"learning_rate": 3.838842972122059e-07,
|
| 107998 |
+
"loss": 0.6434745788574219,
|
| 107999 |
+
"step": 15427
|
| 108000 |
+
},
|
| 108001 |
+
{
|
| 108002 |
+
"epoch": 18.93006134969325,
|
| 108003 |
+
"grad_norm": 0.25400447845458984,
|
| 108004 |
+
"learning_rate": 3.8300859465021655e-07,
|
| 108005 |
+
"loss": 0.4900987148284912,
|
| 108006 |
+
"step": 15428
|
| 108007 |
+
},
|
| 108008 |
+
{
|
| 108009 |
+
"epoch": 18.931288343558283,
|
| 108010 |
+
"grad_norm": 0.2823043763637543,
|
| 108011 |
+
"learning_rate": 3.8213388432893625e-07,
|
| 108012 |
+
"loss": 0.6874721050262451,
|
| 108013 |
+
"step": 15429
|
| 108014 |
+
},
|
| 108015 |
+
{
|
| 108016 |
+
"epoch": 18.93251533742331,
|
| 108017 |
+
"grad_norm": 0.2400689274072647,
|
| 108018 |
+
"learning_rate": 3.8126016628363124e-07,
|
| 108019 |
+
"loss": 0.5242182016372681,
|
| 108020 |
+
"step": 15430
|
| 108021 |
+
},
|
| 108022 |
+
{
|
| 108023 |
+
"epoch": 18.933742331288343,
|
| 108024 |
+
"grad_norm": 0.18596012890338898,
|
| 108025 |
+
"learning_rate": 3.8038744054951227e-07,
|
| 108026 |
+
"loss": 0.21262076497077942,
|
| 108027 |
+
"step": 15431
|
| 108028 |
+
},
|
| 108029 |
+
{
|
| 108030 |
+
"epoch": 18.934969325153375,
|
| 108031 |
+
"grad_norm": 0.28190475702285767,
|
| 108032 |
+
"learning_rate": 3.7951570716175666e-07,
|
| 108033 |
+
"loss": 0.6376181840896606,
|
| 108034 |
+
"step": 15432
|
| 108035 |
+
},
|
| 108036 |
+
{
|
| 108037 |
+
"epoch": 18.936196319018403,
|
| 108038 |
+
"grad_norm": 0.24646449089050293,
|
| 108039 |
+
"learning_rate": 3.786449661555058e-07,
|
| 108040 |
+
"loss": 0.5473042726516724,
|
| 108041 |
+
"step": 15433
|
| 108042 |
+
},
|
| 108043 |
+
{
|
| 108044 |
+
"epoch": 18.937423312883435,
|
| 108045 |
+
"grad_norm": 0.27783259749412537,
|
| 108046 |
+
"learning_rate": 3.777752175658511e-07,
|
| 108047 |
+
"loss": 0.5992516279220581,
|
| 108048 |
+
"step": 15434
|
| 108049 |
+
},
|
| 108050 |
+
{
|
| 108051 |
+
"epoch": 18.938650306748468,
|
| 108052 |
+
"grad_norm": 0.2646479308605194,
|
| 108053 |
+
"learning_rate": 3.769064614278561e-07,
|
| 108054 |
+
"loss": 0.5966908931732178,
|
| 108055 |
+
"step": 15435
|
| 108056 |
+
},
|
| 108057 |
+
{
|
| 108058 |
+
"epoch": 18.939877300613496,
|
| 108059 |
+
"grad_norm": 0.2635233700275421,
|
| 108060 |
+
"learning_rate": 3.7603869777653176e-07,
|
| 108061 |
+
"loss": 0.4744076430797577,
|
| 108062 |
+
"step": 15436
|
| 108063 |
+
},
|
| 108064 |
+
{
|
| 108065 |
+
"epoch": 18.941104294478528,
|
| 108066 |
+
"grad_norm": 0.2693670988082886,
|
| 108067 |
+
"learning_rate": 3.751719266468584e-07,
|
| 108068 |
+
"loss": 0.6023712754249573,
|
| 108069 |
+
"step": 15437
|
| 108070 |
+
},
|
| 108071 |
+
{
|
| 108072 |
+
"epoch": 18.94233128834356,
|
| 108073 |
+
"grad_norm": 0.26635730266571045,
|
| 108074 |
+
"learning_rate": 3.7430614807377194e-07,
|
| 108075 |
+
"loss": 0.5202741622924805,
|
| 108076 |
+
"step": 15438
|
| 108077 |
+
},
|
| 108078 |
+
{
|
| 108079 |
+
"epoch": 18.94355828220859,
|
| 108080 |
+
"grad_norm": 0.24820347130298615,
|
| 108081 |
+
"learning_rate": 3.734413620921695e-07,
|
| 108082 |
+
"loss": 0.6180890798568726,
|
| 108083 |
+
"step": 15439
|
| 108084 |
+
},
|
| 108085 |
+
{
|
| 108086 |
+
"epoch": 18.94478527607362,
|
| 108087 |
+
"grad_norm": 0.28504833579063416,
|
| 108088 |
+
"learning_rate": 3.725775687369121e-07,
|
| 108089 |
+
"loss": 0.614676833152771,
|
| 108090 |
+
"step": 15440
|
| 108091 |
+
},
|
| 108092 |
+
{
|
| 108093 |
+
"epoch": 18.94601226993865,
|
| 108094 |
+
"grad_norm": 0.26070815324783325,
|
| 108095 |
+
"learning_rate": 3.717147680428107e-07,
|
| 108096 |
+
"loss": 0.590442419052124,
|
| 108097 |
+
"step": 15441
|
| 108098 |
+
},
|
| 108099 |
+
{
|
| 108100 |
+
"epoch": 18.94723926380368,
|
| 108101 |
+
"grad_norm": 0.25560688972473145,
|
| 108102 |
+
"learning_rate": 3.708529600446459e-07,
|
| 108103 |
+
"loss": 0.4581637978553772,
|
| 108104 |
+
"step": 15442
|
| 108105 |
+
},
|
| 108106 |
+
{
|
| 108107 |
+
"epoch": 18.948466257668713,
|
| 108108 |
+
"grad_norm": 0.2697610557079315,
|
| 108109 |
+
"learning_rate": 3.699921447771509e-07,
|
| 108110 |
+
"loss": 0.36843010783195496,
|
| 108111 |
+
"step": 15443
|
| 108112 |
+
},
|
| 108113 |
+
{
|
| 108114 |
+
"epoch": 18.94969325153374,
|
| 108115 |
+
"grad_norm": 0.2566494345664978,
|
| 108116 |
+
"learning_rate": 3.691323222750287e-07,
|
| 108117 |
+
"loss": 0.6538009643554688,
|
| 108118 |
+
"step": 15444
|
| 108119 |
+
},
|
| 108120 |
+
{
|
| 108121 |
+
"epoch": 18.950920245398773,
|
| 108122 |
+
"grad_norm": 0.2448330670595169,
|
| 108123 |
+
"learning_rate": 3.68273492572932e-07,
|
| 108124 |
+
"loss": 0.4754892587661743,
|
| 108125 |
+
"step": 15445
|
| 108126 |
+
},
|
| 108127 |
+
{
|
| 108128 |
+
"epoch": 18.952147239263805,
|
| 108129 |
+
"grad_norm": 0.2714025676250458,
|
| 108130 |
+
"learning_rate": 3.6741565570547755e-07,
|
| 108131 |
+
"loss": 0.6130117177963257,
|
| 108132 |
+
"step": 15446
|
| 108133 |
+
},
|
| 108134 |
+
{
|
| 108135 |
+
"epoch": 18.953374233128834,
|
| 108136 |
+
"grad_norm": 0.2826921045780182,
|
| 108137 |
+
"learning_rate": 3.6655881170724604e-07,
|
| 108138 |
+
"loss": 0.7097179293632507,
|
| 108139 |
+
"step": 15447
|
| 108140 |
+
},
|
| 108141 |
+
{
|
| 108142 |
+
"epoch": 18.954601226993866,
|
| 108143 |
+
"grad_norm": 0.2563024163246155,
|
| 108144 |
+
"learning_rate": 3.65702960612771e-07,
|
| 108145 |
+
"loss": 0.5145770907402039,
|
| 108146 |
+
"step": 15448
|
| 108147 |
+
},
|
| 108148 |
+
{
|
| 108149 |
+
"epoch": 18.955828220858894,
|
| 108150 |
+
"grad_norm": 0.2801174521446228,
|
| 108151 |
+
"learning_rate": 3.6484810245655254e-07,
|
| 108152 |
+
"loss": 0.598757803440094,
|
| 108153 |
+
"step": 15449
|
| 108154 |
+
},
|
| 108155 |
+
{
|
| 108156 |
+
"epoch": 18.957055214723926,
|
| 108157 |
+
"grad_norm": 0.26695194840431213,
|
| 108158 |
+
"learning_rate": 3.6399423727304095e-07,
|
| 108159 |
+
"loss": 0.5180026292800903,
|
| 108160 |
+
"step": 15450
|
| 108161 |
+
},
|
| 108162 |
+
{
|
| 108163 |
+
"epoch": 18.958282208588958,
|
| 108164 |
+
"grad_norm": 0.2266158014535904,
|
| 108165 |
+
"learning_rate": 3.631413650966614e-07,
|
| 108166 |
+
"loss": 0.4648008346557617,
|
| 108167 |
+
"step": 15451
|
| 108168 |
+
},
|
| 108169 |
+
{
|
| 108170 |
+
"epoch": 18.959509202453987,
|
| 108171 |
+
"grad_norm": 0.2749348282814026,
|
| 108172 |
+
"learning_rate": 3.622894859617837e-07,
|
| 108173 |
+
"loss": 0.6426236629486084,
|
| 108174 |
+
"step": 15452
|
| 108175 |
+
},
|
| 108176 |
+
{
|
| 108177 |
+
"epoch": 18.96073619631902,
|
| 108178 |
+
"grad_norm": 0.27203667163848877,
|
| 108179 |
+
"learning_rate": 3.6143859990274975e-07,
|
| 108180 |
+
"loss": 0.5754863023757935,
|
| 108181 |
+
"step": 15453
|
| 108182 |
+
},
|
| 108183 |
+
{
|
| 108184 |
+
"epoch": 18.96196319018405,
|
| 108185 |
+
"grad_norm": 0.2672336995601654,
|
| 108186 |
+
"learning_rate": 3.6058870695385714e-07,
|
| 108187 |
+
"loss": 0.5096818208694458,
|
| 108188 |
+
"step": 15454
|
| 108189 |
+
},
|
| 108190 |
+
{
|
| 108191 |
+
"epoch": 18.96319018404908,
|
| 108192 |
+
"grad_norm": 0.280908465385437,
|
| 108193 |
+
"learning_rate": 3.5973980714935627e-07,
|
| 108194 |
+
"loss": 0.626288652420044,
|
| 108195 |
+
"step": 15455
|
| 108196 |
+
},
|
| 108197 |
+
{
|
| 108198 |
+
"epoch": 18.96441717791411,
|
| 108199 |
+
"grad_norm": 0.29029160737991333,
|
| 108200 |
+
"learning_rate": 3.58891900523467e-07,
|
| 108201 |
+
"loss": 0.760712742805481,
|
| 108202 |
+
"step": 15456
|
| 108203 |
+
},
|
| 108204 |
+
{
|
| 108205 |
+
"epoch": 18.96564417177914,
|
| 108206 |
+
"grad_norm": 0.24727702140808105,
|
| 108207 |
+
"learning_rate": 3.580449871103703e-07,
|
| 108208 |
+
"loss": 0.444758802652359,
|
| 108209 |
+
"step": 15457
|
| 108210 |
+
},
|
| 108211 |
+
{
|
| 108212 |
+
"epoch": 18.96687116564417,
|
| 108213 |
+
"grad_norm": 0.31069016456604004,
|
| 108214 |
+
"learning_rate": 3.5719906694419435e-07,
|
| 108215 |
+
"loss": 0.6165425181388855,
|
| 108216 |
+
"step": 15458
|
| 108217 |
+
},
|
| 108218 |
+
{
|
| 108219 |
+
"epoch": 18.968098159509204,
|
| 108220 |
+
"grad_norm": 0.27184250950813293,
|
| 108221 |
+
"learning_rate": 3.563541400590453e-07,
|
| 108222 |
+
"loss": 0.7477340698242188,
|
| 108223 |
+
"step": 15459
|
| 108224 |
+
},
|
| 108225 |
+
{
|
| 108226 |
+
"epoch": 18.969325153374232,
|
| 108227 |
+
"grad_norm": 0.2867529094219208,
|
| 108228 |
+
"learning_rate": 3.555102064889737e-07,
|
| 108229 |
+
"loss": 0.594327986240387,
|
| 108230 |
+
"step": 15460
|
| 108231 |
+
},
|
| 108232 |
+
{
|
| 108233 |
+
"epoch": 18.970552147239264,
|
| 108234 |
+
"grad_norm": 0.28202736377716064,
|
| 108235 |
+
"learning_rate": 3.5466726626799675e-07,
|
| 108236 |
+
"loss": 0.5788388252258301,
|
| 108237 |
+
"step": 15461
|
| 108238 |
+
},
|
| 108239 |
+
{
|
| 108240 |
+
"epoch": 18.971779141104296,
|
| 108241 |
+
"grad_norm": 0.22165407240390778,
|
| 108242 |
+
"learning_rate": 3.5382531943009003e-07,
|
| 108243 |
+
"loss": 0.38775181770324707,
|
| 108244 |
+
"step": 15462
|
| 108245 |
+
},
|
| 108246 |
+
{
|
| 108247 |
+
"epoch": 18.973006134969324,
|
| 108248 |
+
"grad_norm": 0.2565503418445587,
|
| 108249 |
+
"learning_rate": 3.5298436600919304e-07,
|
| 108250 |
+
"loss": 0.5452792644500732,
|
| 108251 |
+
"step": 15463
|
| 108252 |
+
},
|
| 108253 |
+
{
|
| 108254 |
+
"epoch": 18.974233128834356,
|
| 108255 |
+
"grad_norm": 0.2809341847896576,
|
| 108256 |
+
"learning_rate": 3.5214440603920097e-07,
|
| 108257 |
+
"loss": 0.7361241579055786,
|
| 108258 |
+
"step": 15464
|
| 108259 |
+
},
|
| 108260 |
+
{
|
| 108261 |
+
"epoch": 18.975460122699385,
|
| 108262 |
+
"grad_norm": 0.29712262749671936,
|
| 108263 |
+
"learning_rate": 3.5130543955397e-07,
|
| 108264 |
+
"loss": 0.7266162633895874,
|
| 108265 |
+
"step": 15465
|
| 108266 |
+
},
|
| 108267 |
+
{
|
| 108268 |
+
"epoch": 18.976687116564417,
|
| 108269 |
+
"grad_norm": 0.2793950140476227,
|
| 108270 |
+
"learning_rate": 3.5046746658731476e-07,
|
| 108271 |
+
"loss": 0.7207373976707458,
|
| 108272 |
+
"step": 15466
|
| 108273 |
+
},
|
| 108274 |
+
{
|
| 108275 |
+
"epoch": 18.97791411042945,
|
| 108276 |
+
"grad_norm": 0.2632257640361786,
|
| 108277 |
+
"learning_rate": 3.496304871730166e-07,
|
| 108278 |
+
"loss": 0.6137797236442566,
|
| 108279 |
+
"step": 15467
|
| 108280 |
+
},
|
| 108281 |
+
{
|
| 108282 |
+
"epoch": 18.979141104294477,
|
| 108283 |
+
"grad_norm": 0.2478286474943161,
|
| 108284 |
+
"learning_rate": 3.487945013448041e-07,
|
| 108285 |
+
"loss": 0.4830426573753357,
|
| 108286 |
+
"step": 15468
|
| 108287 |
+
},
|
| 108288 |
+
{
|
| 108289 |
+
"epoch": 18.98036809815951,
|
| 108290 |
+
"grad_norm": 0.26361966133117676,
|
| 108291 |
+
"learning_rate": 3.4795950913638074e-07,
|
| 108292 |
+
"loss": 0.6802637577056885,
|
| 108293 |
+
"step": 15469
|
| 108294 |
+
},
|
| 108295 |
+
{
|
| 108296 |
+
"epoch": 18.98159509202454,
|
| 108297 |
+
"grad_norm": 0.25063711404800415,
|
| 108298 |
+
"learning_rate": 3.471255105814003e-07,
|
| 108299 |
+
"loss": 0.507027804851532,
|
| 108300 |
+
"step": 15470
|
| 108301 |
+
},
|
| 108302 |
+
{
|
| 108303 |
+
"epoch": 18.98282208588957,
|
| 108304 |
+
"grad_norm": 0.2972632646560669,
|
| 108305 |
+
"learning_rate": 3.462925057134747e-07,
|
| 108306 |
+
"loss": 0.6897913813591003,
|
| 108307 |
+
"step": 15471
|
| 108308 |
+
},
|
| 108309 |
+
{
|
| 108310 |
+
"epoch": 18.9840490797546,
|
| 108311 |
+
"grad_norm": 0.2530694603919983,
|
| 108312 |
+
"learning_rate": 3.4546049456618823e-07,
|
| 108313 |
+
"loss": 0.5957586169242859,
|
| 108314 |
+
"step": 15472
|
| 108315 |
+
},
|
| 108316 |
+
{
|
| 108317 |
+
"epoch": 18.98527607361963,
|
| 108318 |
+
"grad_norm": 0.2942049503326416,
|
| 108319 |
+
"learning_rate": 3.446294771730696e-07,
|
| 108320 |
+
"loss": 0.5989881157875061,
|
| 108321 |
+
"step": 15473
|
| 108322 |
+
},
|
| 108323 |
+
{
|
| 108324 |
+
"epoch": 18.986503067484662,
|
| 108325 |
+
"grad_norm": 0.26282650232315063,
|
| 108326 |
+
"learning_rate": 3.437994535676198e-07,
|
| 108327 |
+
"loss": 0.4580558240413666,
|
| 108328 |
+
"step": 15474
|
| 108329 |
+
},
|
| 108330 |
+
{
|
| 108331 |
+
"epoch": 18.987730061349694,
|
| 108332 |
+
"grad_norm": 0.31557324528694153,
|
| 108333 |
+
"learning_rate": 3.429704237832898e-07,
|
| 108334 |
+
"loss": 0.5486705303192139,
|
| 108335 |
+
"step": 15475
|
| 108336 |
+
},
|
| 108337 |
+
{
|
| 108338 |
+
"epoch": 18.988957055214723,
|
| 108339 |
+
"grad_norm": 0.3002515435218811,
|
| 108340 |
+
"learning_rate": 3.421423878534974e-07,
|
| 108341 |
+
"loss": 0.7435135841369629,
|
| 108342 |
+
"step": 15476
|
| 108343 |
+
},
|
| 108344 |
+
{
|
| 108345 |
+
"epoch": 18.990184049079755,
|
| 108346 |
+
"grad_norm": 0.26372063159942627,
|
| 108347 |
+
"learning_rate": 3.413153458116214e-07,
|
| 108348 |
+
"loss": 0.521518886089325,
|
| 108349 |
+
"step": 15477
|
| 108350 |
+
},
|
| 108351 |
+
{
|
| 108352 |
+
"epoch": 18.991411042944787,
|
| 108353 |
+
"grad_norm": 0.24327510595321655,
|
| 108354 |
+
"learning_rate": 3.404892976909935e-07,
|
| 108355 |
+
"loss": 0.6549983024597168,
|
| 108356 |
+
"step": 15478
|
| 108357 |
+
},
|
| 108358 |
+
{
|
| 108359 |
+
"epoch": 18.992638036809815,
|
| 108360 |
+
"grad_norm": 0.2537882328033447,
|
| 108361 |
+
"learning_rate": 3.39664243524912e-07,
|
| 108362 |
+
"loss": 0.6205604672431946,
|
| 108363 |
+
"step": 15479
|
| 108364 |
+
},
|
| 108365 |
+
{
|
| 108366 |
+
"epoch": 18.993865030674847,
|
| 108367 |
+
"grad_norm": 0.22979171574115753,
|
| 108368 |
+
"learning_rate": 3.3884018334663093e-07,
|
| 108369 |
+
"loss": 0.5027973055839539,
|
| 108370 |
+
"step": 15480
|
| 108371 |
+
},
|
| 108372 |
+
{
|
| 108373 |
+
"epoch": 18.99509202453988,
|
| 108374 |
+
"grad_norm": 0.25540807843208313,
|
| 108375 |
+
"learning_rate": 3.3801711718936536e-07,
|
| 108376 |
+
"loss": 0.5741037726402283,
|
| 108377 |
+
"step": 15481
|
| 108378 |
+
},
|
| 108379 |
+
{
|
| 108380 |
+
"epoch": 18.996319018404908,
|
| 108381 |
+
"grad_norm": 0.24836954474449158,
|
| 108382 |
+
"learning_rate": 3.3719504508629154e-07,
|
| 108383 |
+
"loss": 0.5238505601882935,
|
| 108384 |
+
"step": 15482
|
| 108385 |
+
},
|
| 108386 |
+
{
|
| 108387 |
+
"epoch": 18.99754601226994,
|
| 108388 |
+
"grad_norm": 0.2540181577205658,
|
| 108389 |
+
"learning_rate": 3.363739670705468e-07,
|
| 108390 |
+
"loss": 0.5237050652503967,
|
| 108391 |
+
"step": 15483
|
| 108392 |
+
},
|
| 108393 |
+
{
|
| 108394 |
+
"epoch": 18.998773006134968,
|
| 108395 |
+
"grad_norm": 0.23090288043022156,
|
| 108396 |
+
"learning_rate": 3.355538831752242e-07,
|
| 108397 |
+
"loss": 0.553159236907959,
|
| 108398 |
+
"step": 15484
|
| 108399 |
+
},
|
| 108400 |
+
{
|
| 108401 |
+
"epoch": 19.0,
|
| 108402 |
+
"grad_norm": 0.31852981448173523,
|
| 108403 |
+
"learning_rate": 3.347347934333778e-07,
|
| 108404 |
+
"loss": 0.6536753177642822,
|
| 108405 |
+
"step": 15485
|
| 108406 |
+
},
|
| 108407 |
+
{
|
| 108408 |
+
"epoch": 19.001226993865032,
|
| 108409 |
+
"grad_norm": 0.25920507311820984,
|
| 108410 |
+
"learning_rate": 3.339166978780256e-07,
|
| 108411 |
+
"loss": 0.6572109460830688,
|
| 108412 |
+
"step": 15486
|
| 108413 |
+
},
|
| 108414 |
+
{
|
| 108415 |
+
"epoch": 19.00245398773006,
|
| 108416 |
+
"grad_norm": 0.2949686646461487,
|
| 108417 |
+
"learning_rate": 3.3309959654214127e-07,
|
| 108418 |
+
"loss": 0.689014196395874,
|
| 108419 |
+
"step": 15487
|
| 108420 |
+
},
|
| 108421 |
+
{
|
| 108422 |
+
"epoch": 19.003680981595092,
|
| 108423 |
+
"grad_norm": 0.28092673420906067,
|
| 108424 |
+
"learning_rate": 3.322834894586596e-07,
|
| 108425 |
+
"loss": 0.6477980613708496,
|
| 108426 |
+
"step": 15488
|
| 108427 |
+
},
|
| 108428 |
+
{
|
| 108429 |
+
"epoch": 19.004907975460124,
|
| 108430 |
+
"grad_norm": 0.30857640504837036,
|
| 108431 |
+
"learning_rate": 3.3146837666047646e-07,
|
| 108432 |
+
"loss": 0.5772422552108765,
|
| 108433 |
+
"step": 15489
|
| 108434 |
+
},
|
| 108435 |
+
{
|
| 108436 |
+
"epoch": 19.006134969325153,
|
| 108437 |
+
"grad_norm": 0.2557373344898224,
|
| 108438 |
+
"learning_rate": 3.306542581804434e-07,
|
| 108439 |
+
"loss": 0.6570507884025574,
|
| 108440 |
+
"step": 15490
|
| 108441 |
+
},
|
| 108442 |
+
{
|
| 108443 |
+
"epoch": 19.007361963190185,
|
| 108444 |
+
"grad_norm": 0.2586783766746521,
|
| 108445 |
+
"learning_rate": 3.2984113405138126e-07,
|
| 108446 |
+
"loss": 0.5694788694381714,
|
| 108447 |
+
"step": 15491
|
| 108448 |
+
},
|
| 108449 |
+
{
|
| 108450 |
+
"epoch": 19.008588957055213,
|
| 108451 |
+
"grad_norm": 0.23067456483840942,
|
| 108452 |
+
"learning_rate": 3.290290043060612e-07,
|
| 108453 |
+
"loss": 0.5652076601982117,
|
| 108454 |
+
"step": 15492
|
| 108455 |
+
},
|
| 108456 |
+
{
|
| 108457 |
+
"epoch": 19.009815950920245,
|
| 108458 |
+
"grad_norm": 0.2511611580848694,
|
| 108459 |
+
"learning_rate": 3.2821786897721805e-07,
|
| 108460 |
+
"loss": 0.37193605303764343,
|
| 108461 |
+
"step": 15493
|
| 108462 |
+
},
|
| 108463 |
+
{
|
| 108464 |
+
"epoch": 19.011042944785277,
|
| 108465 |
+
"grad_norm": 0.26309314370155334,
|
| 108466 |
+
"learning_rate": 3.2740772809754517e-07,
|
| 108467 |
+
"loss": 0.5918000340461731,
|
| 108468 |
+
"step": 15494
|
| 108469 |
+
},
|
| 108470 |
+
{
|
| 108471 |
+
"epoch": 19.012269938650306,
|
| 108472 |
+
"grad_norm": 0.2930505573749542,
|
| 108473 |
+
"learning_rate": 3.265985816996997e-07,
|
| 108474 |
+
"loss": 0.7556619644165039,
|
| 108475 |
+
"step": 15495
|
| 108476 |
+
},
|
| 108477 |
+
{
|
| 108478 |
+
"epoch": 19.013496932515338,
|
| 108479 |
+
"grad_norm": 0.28340160846710205,
|
| 108480 |
+
"learning_rate": 3.2579042981629447e-07,
|
| 108481 |
+
"loss": 0.6015145182609558,
|
| 108482 |
+
"step": 15496
|
| 108483 |
+
},
|
| 108484 |
+
{
|
| 108485 |
+
"epoch": 19.01472392638037,
|
| 108486 |
+
"grad_norm": 0.28173011541366577,
|
| 108487 |
+
"learning_rate": 3.2498327247990346e-07,
|
| 108488 |
+
"loss": 0.6100156307220459,
|
| 108489 |
+
"step": 15497
|
| 108490 |
+
},
|
| 108491 |
+
{
|
| 108492 |
+
"epoch": 19.0159509202454,
|
| 108493 |
+
"grad_norm": 0.2715858519077301,
|
| 108494 |
+
"learning_rate": 3.241771097230617e-07,
|
| 108495 |
+
"loss": 0.6466595530509949,
|
| 108496 |
+
"step": 15498
|
| 108497 |
+
},
|
| 108498 |
+
{
|
| 108499 |
+
"epoch": 19.01717791411043,
|
| 108500 |
+
"grad_norm": 0.2637956440448761,
|
| 108501 |
+
"learning_rate": 3.233719415782627e-07,
|
| 108502 |
+
"loss": 0.6553301811218262,
|
| 108503 |
+
"step": 15499
|
| 108504 |
+
},
|
| 108505 |
+
{
|
| 108506 |
+
"epoch": 19.01840490797546,
|
| 108507 |
+
"grad_norm": 0.2816595733165741,
|
| 108508 |
+
"learning_rate": 3.22567768077961e-07,
|
| 108509 |
+
"loss": 0.5865865349769592,
|
| 108510 |
+
"step": 15500
|
| 108511 |
+
},
|
| 108512 |
+
{
|
| 108513 |
+
"epoch": 19.01963190184049,
|
| 108514 |
+
"grad_norm": 0.2643696665763855,
|
| 108515 |
+
"learning_rate": 3.217645892545695e-07,
|
| 108516 |
+
"loss": 0.579836905002594,
|
| 108517 |
+
"step": 15501
|
| 108518 |
+
},
|
| 108519 |
+
{
|
| 108520 |
+
"epoch": 19.020858895705523,
|
| 108521 |
+
"grad_norm": 0.24225236475467682,
|
| 108522 |
+
"learning_rate": 3.2096240514046525e-07,
|
| 108523 |
+
"loss": 0.4839652180671692,
|
| 108524 |
+
"step": 15502
|
| 108525 |
+
},
|
| 108526 |
+
{
|
| 108527 |
+
"epoch": 19.02208588957055,
|
| 108528 |
+
"grad_norm": 0.26465895771980286,
|
| 108529 |
+
"learning_rate": 3.2016121576797787e-07,
|
| 108530 |
+
"loss": 0.4135524034500122,
|
| 108531 |
+
"step": 15503
|
| 108532 |
+
},
|
| 108533 |
+
{
|
| 108534 |
+
"epoch": 19.023312883435583,
|
| 108535 |
+
"grad_norm": 0.2830473482608795,
|
| 108536 |
+
"learning_rate": 3.193610211694037e-07,
|
| 108537 |
+
"loss": 0.6942921876907349,
|
| 108538 |
+
"step": 15504
|
| 108539 |
+
},
|
| 108540 |
+
{
|
| 108541 |
+
"epoch": 19.024539877300615,
|
| 108542 |
+
"grad_norm": 0.3100355267524719,
|
| 108543 |
+
"learning_rate": 3.1856182137699473e-07,
|
| 108544 |
+
"loss": 0.4198477864265442,
|
| 108545 |
+
"step": 15505
|
| 108546 |
+
},
|
| 108547 |
+
{
|
| 108548 |
+
"epoch": 19.025766871165644,
|
| 108549 |
+
"grad_norm": 0.26637738943099976,
|
| 108550 |
+
"learning_rate": 3.1776361642296415e-07,
|
| 108551 |
+
"loss": 0.8694722652435303,
|
| 108552 |
+
"step": 15506
|
| 108553 |
+
},
|
| 108554 |
+
{
|
| 108555 |
+
"epoch": 19.026993865030676,
|
| 108556 |
+
"grad_norm": 0.25420281291007996,
|
| 108557 |
+
"learning_rate": 3.1696640633948895e-07,
|
| 108558 |
+
"loss": 0.5303689241409302,
|
| 108559 |
+
"step": 15507
|
| 108560 |
+
},
|
| 108561 |
+
{
|
| 108562 |
+
"epoch": 19.028220858895704,
|
| 108563 |
+
"grad_norm": 0.28541603684425354,
|
| 108564 |
+
"learning_rate": 3.161701911586962e-07,
|
| 108565 |
+
"loss": 0.682641863822937,
|
| 108566 |
+
"step": 15508
|
| 108567 |
+
},
|
| 108568 |
+
{
|
| 108569 |
+
"epoch": 19.029447852760736,
|
| 108570 |
+
"grad_norm": 0.2848469913005829,
|
| 108571 |
+
"learning_rate": 3.153749709126852e-07,
|
| 108572 |
+
"loss": 0.5118822455406189,
|
| 108573 |
+
"step": 15509
|
| 108574 |
+
},
|
| 108575 |
+
{
|
| 108576 |
+
"epoch": 19.030674846625768,
|
| 108577 |
+
"grad_norm": 0.24559995532035828,
|
| 108578 |
+
"learning_rate": 3.1458074563350537e-07,
|
| 108579 |
+
"loss": 0.48838451504707336,
|
| 108580 |
+
"step": 15510
|
| 108581 |
+
},
|
| 108582 |
+
{
|
| 108583 |
+
"epoch": 19.031901840490796,
|
| 108584 |
+
"grad_norm": 0.2584964632987976,
|
| 108585 |
+
"learning_rate": 3.1378751535316996e-07,
|
| 108586 |
+
"loss": 0.5585110187530518,
|
| 108587 |
+
"step": 15511
|
| 108588 |
+
},
|
| 108589 |
+
{
|
| 108590 |
+
"epoch": 19.03312883435583,
|
| 108591 |
+
"grad_norm": 0.26058393716812134,
|
| 108592 |
+
"learning_rate": 3.129952801036534e-07,
|
| 108593 |
+
"loss": 0.5690903663635254,
|
| 108594 |
+
"step": 15512
|
| 108595 |
+
},
|
| 108596 |
+
{
|
| 108597 |
+
"epoch": 19.03435582822086,
|
| 108598 |
+
"grad_norm": 0.25363609194755554,
|
| 108599 |
+
"learning_rate": 3.1220403991688573e-07,
|
| 108600 |
+
"loss": 0.40863513946533203,
|
| 108601 |
+
"step": 15513
|
| 108602 |
+
},
|
| 108603 |
+
{
|
| 108604 |
+
"epoch": 19.03558282208589,
|
| 108605 |
+
"grad_norm": 0.26774558424949646,
|
| 108606 |
+
"learning_rate": 3.114137948247636e-07,
|
| 108607 |
+
"loss": 0.5699991583824158,
|
| 108608 |
+
"step": 15514
|
| 108609 |
+
},
|
| 108610 |
+
{
|
| 108611 |
+
"epoch": 19.03680981595092,
|
| 108612 |
+
"grad_norm": 0.2533966898918152,
|
| 108613 |
+
"learning_rate": 3.106245448591366e-07,
|
| 108614 |
+
"loss": 0.5713862180709839,
|
| 108615 |
+
"step": 15515
|
| 108616 |
+
},
|
| 108617 |
+
{
|
| 108618 |
+
"epoch": 19.03803680981595,
|
| 108619 |
+
"grad_norm": 0.277849406003952,
|
| 108620 |
+
"learning_rate": 3.098362900518209e-07,
|
| 108621 |
+
"loss": 0.5396018028259277,
|
| 108622 |
+
"step": 15516
|
| 108623 |
+
},
|
| 108624 |
+
{
|
| 108625 |
+
"epoch": 19.03926380368098,
|
| 108626 |
+
"grad_norm": 0.2900213599205017,
|
| 108627 |
+
"learning_rate": 3.0904903043458275e-07,
|
| 108628 |
+
"loss": 0.8321250677108765,
|
| 108629 |
+
"step": 15517
|
| 108630 |
+
},
|
| 108631 |
+
{
|
| 108632 |
+
"epoch": 19.040490797546013,
|
| 108633 |
+
"grad_norm": 0.2735212445259094,
|
| 108634 |
+
"learning_rate": 3.0826276603915517e-07,
|
| 108635 |
+
"loss": 0.607227087020874,
|
| 108636 |
+
"step": 15518
|
| 108637 |
+
},
|
| 108638 |
+
{
|
| 108639 |
+
"epoch": 19.041717791411042,
|
| 108640 |
+
"grad_norm": 0.2797504961490631,
|
| 108641 |
+
"learning_rate": 3.074774968972349e-07,
|
| 108642 |
+
"loss": 0.6368119716644287,
|
| 108643 |
+
"step": 15519
|
| 108644 |
+
},
|
| 108645 |
+
{
|
| 108646 |
+
"epoch": 19.042944785276074,
|
| 108647 |
+
"grad_norm": 0.26525843143463135,
|
| 108648 |
+
"learning_rate": 3.0669322304047176e-07,
|
| 108649 |
+
"loss": 0.6011868715286255,
|
| 108650 |
+
"step": 15520
|
| 108651 |
+
},
|
| 108652 |
+
{
|
| 108653 |
+
"epoch": 19.044171779141106,
|
| 108654 |
+
"grad_norm": 0.3154607117176056,
|
| 108655 |
+
"learning_rate": 3.059099445004793e-07,
|
| 108656 |
+
"loss": 0.6508238315582275,
|
| 108657 |
+
"step": 15521
|
| 108658 |
+
},
|
| 108659 |
+
{
|
| 108660 |
+
"epoch": 19.045398773006134,
|
| 108661 |
+
"grad_norm": 0.2825962007045746,
|
| 108662 |
+
"learning_rate": 3.051276613088239e-07,
|
| 108663 |
+
"loss": 0.6377282738685608,
|
| 108664 |
+
"step": 15522
|
| 108665 |
+
},
|
| 108666 |
+
{
|
| 108667 |
+
"epoch": 19.046625766871166,
|
| 108668 |
+
"grad_norm": 0.25757256150245667,
|
| 108669 |
+
"learning_rate": 3.0434637349704144e-07,
|
| 108670 |
+
"loss": 0.5409518480300903,
|
| 108671 |
+
"step": 15523
|
| 108672 |
+
},
|
| 108673 |
+
{
|
| 108674 |
+
"epoch": 19.047852760736195,
|
| 108675 |
+
"grad_norm": 0.2591853141784668,
|
| 108676 |
+
"learning_rate": 3.035660810966234e-07,
|
| 108677 |
+
"loss": 0.6383548974990845,
|
| 108678 |
+
"step": 15524
|
| 108679 |
+
},
|
| 108680 |
+
{
|
| 108681 |
+
"epoch": 19.049079754601227,
|
| 108682 |
+
"grad_norm": 0.27247244119644165,
|
| 108683 |
+
"learning_rate": 3.027867841390197e-07,
|
| 108684 |
+
"loss": 0.7271251678466797,
|
| 108685 |
+
"step": 15525
|
| 108686 |
+
},
|
| 108687 |
+
{
|
| 108688 |
+
"epoch": 19.05030674846626,
|
| 108689 |
+
"grad_norm": 0.23963193595409393,
|
| 108690 |
+
"learning_rate": 3.020084826556413e-07,
|
| 108691 |
+
"loss": 0.3522301912307739,
|
| 108692 |
+
"step": 15526
|
| 108693 |
+
},
|
| 108694 |
+
{
|
| 108695 |
+
"epoch": 19.051533742331287,
|
| 108696 |
+
"grad_norm": 0.24897794425487518,
|
| 108697 |
+
"learning_rate": 3.012311766778603e-07,
|
| 108698 |
+
"loss": 0.5510503053665161,
|
| 108699 |
+
"step": 15527
|
| 108700 |
+
},
|
| 108701 |
+
{
|
| 108702 |
+
"epoch": 19.05276073619632,
|
| 108703 |
+
"grad_norm": 0.2757461369037628,
|
| 108704 |
+
"learning_rate": 3.004548662370071e-07,
|
| 108705 |
+
"loss": 0.4541953206062317,
|
| 108706 |
+
"step": 15528
|
| 108707 |
+
},
|
| 108708 |
+
{
|
| 108709 |
+
"epoch": 19.05398773006135,
|
| 108710 |
+
"grad_norm": 0.28025543689727783,
|
| 108711 |
+
"learning_rate": 2.996795513643735e-07,
|
| 108712 |
+
"loss": 0.5072070360183716,
|
| 108713 |
+
"step": 15529
|
| 108714 |
+
},
|
| 108715 |
+
{
|
| 108716 |
+
"epoch": 19.05521472392638,
|
| 108717 |
+
"grad_norm": 0.25767865777015686,
|
| 108718 |
+
"learning_rate": 2.9890523209120944e-07,
|
| 108719 |
+
"loss": 0.5460700988769531,
|
| 108720 |
+
"step": 15530
|
| 108721 |
+
},
|
| 108722 |
+
{
|
| 108723 |
+
"epoch": 19.05644171779141,
|
| 108724 |
+
"grad_norm": 0.3000355064868927,
|
| 108725 |
+
"learning_rate": 2.9813190844872605e-07,
|
| 108726 |
+
"loss": 0.6726352572441101,
|
| 108727 |
+
"step": 15531
|
| 108728 |
+
},
|
| 108729 |
+
{
|
| 108730 |
+
"epoch": 19.05766871165644,
|
| 108731 |
+
"grad_norm": 0.2516064941883087,
|
| 108732 |
+
"learning_rate": 2.9735958046809563e-07,
|
| 108733 |
+
"loss": 0.3925473093986511,
|
| 108734 |
+
"step": 15532
|
| 108735 |
+
},
|
| 108736 |
+
{
|
| 108737 |
+
"epoch": 19.058895705521472,
|
| 108738 |
+
"grad_norm": 0.25730621814727783,
|
| 108739 |
+
"learning_rate": 2.965882481804433e-07,
|
| 108740 |
+
"loss": 0.3751104474067688,
|
| 108741 |
+
"step": 15533
|
| 108742 |
+
},
|
| 108743 |
+
{
|
| 108744 |
+
"epoch": 19.060122699386504,
|
| 108745 |
+
"grad_norm": 0.26689547300338745,
|
| 108746 |
+
"learning_rate": 2.9581791161686355e-07,
|
| 108747 |
+
"loss": 0.5218518972396851,
|
| 108748 |
+
"step": 15534
|
| 108749 |
+
},
|
| 108750 |
+
{
|
| 108751 |
+
"epoch": 19.061349693251532,
|
| 108752 |
+
"grad_norm": 0.2744821012020111,
|
| 108753 |
+
"learning_rate": 2.950485708084039e-07,
|
| 108754 |
+
"loss": 0.5624144077301025,
|
| 108755 |
+
"step": 15535
|
| 108756 |
+
},
|
| 108757 |
+
{
|
| 108758 |
+
"epoch": 19.062576687116565,
|
| 108759 |
+
"grad_norm": 0.2628687024116516,
|
| 108760 |
+
"learning_rate": 2.9428022578607827e-07,
|
| 108761 |
+
"loss": 0.614841103553772,
|
| 108762 |
+
"step": 15536
|
| 108763 |
+
},
|
| 108764 |
+
{
|
| 108765 |
+
"epoch": 19.063803680981597,
|
| 108766 |
+
"grad_norm": 0.2565420866012573,
|
| 108767 |
+
"learning_rate": 2.9351287658085093e-07,
|
| 108768 |
+
"loss": 0.5569916367530823,
|
| 108769 |
+
"step": 15537
|
| 108770 |
+
},
|
| 108771 |
+
{
|
| 108772 |
+
"epoch": 19.065030674846625,
|
| 108773 |
+
"grad_norm": 0.24239133298397064,
|
| 108774 |
+
"learning_rate": 2.9274652322365535e-07,
|
| 108775 |
+
"loss": 0.42598956823349,
|
| 108776 |
+
"step": 15538
|
| 108777 |
+
},
|
| 108778 |
+
{
|
| 108779 |
+
"epoch": 19.066257668711657,
|
| 108780 |
+
"grad_norm": 0.25568854808807373,
|
| 108781 |
+
"learning_rate": 2.9198116574538083e-07,
|
| 108782 |
+
"loss": 0.6859689354896545,
|
| 108783 |
+
"step": 15539
|
| 108784 |
+
},
|
| 108785 |
+
{
|
| 108786 |
+
"epoch": 19.067484662576685,
|
| 108787 |
+
"grad_norm": 0.26058533787727356,
|
| 108788 |
+
"learning_rate": 2.912168041768748e-07,
|
| 108789 |
+
"loss": 0.6602602005004883,
|
| 108790 |
+
"step": 15540
|
| 108791 |
+
},
|
| 108792 |
+
{
|
| 108793 |
+
"epoch": 19.068711656441717,
|
| 108794 |
+
"grad_norm": 0.23533086478710175,
|
| 108795 |
+
"learning_rate": 2.9045343854895156e-07,
|
| 108796 |
+
"loss": 0.4718015491962433,
|
| 108797 |
+
"step": 15541
|
| 108798 |
+
},
|
| 108799 |
+
{
|
| 108800 |
+
"epoch": 19.06993865030675,
|
| 108801 |
+
"grad_norm": 0.26327815651893616,
|
| 108802 |
+
"learning_rate": 2.896910688923726e-07,
|
| 108803 |
+
"loss": 0.4511849880218506,
|
| 108804 |
+
"step": 15542
|
| 108805 |
+
},
|
| 108806 |
+
{
|
| 108807 |
+
"epoch": 19.071165644171778,
|
| 108808 |
+
"grad_norm": 0.29071781039237976,
|
| 108809 |
+
"learning_rate": 2.889296952378717e-07,
|
| 108810 |
+
"loss": 0.6808133125305176,
|
| 108811 |
+
"step": 15543
|
| 108812 |
+
},
|
| 108813 |
+
{
|
| 108814 |
+
"epoch": 19.07239263803681,
|
| 108815 |
+
"grad_norm": 0.2691454291343689,
|
| 108816 |
+
"learning_rate": 2.881693176161354e-07,
|
| 108817 |
+
"loss": 0.693962574005127,
|
| 108818 |
+
"step": 15544
|
| 108819 |
+
},
|
| 108820 |
+
{
|
| 108821 |
+
"epoch": 19.073619631901842,
|
| 108822 |
+
"grad_norm": 0.22548571228981018,
|
| 108823 |
+
"learning_rate": 2.8740993605781416e-07,
|
| 108824 |
+
"loss": 0.4249667525291443,
|
| 108825 |
+
"step": 15545
|
| 108826 |
+
},
|
| 108827 |
+
{
|
| 108828 |
+
"epoch": 19.07484662576687,
|
| 108829 |
+
"grad_norm": 0.2482619732618332,
|
| 108830 |
+
"learning_rate": 2.866515505935169e-07,
|
| 108831 |
+
"loss": 0.6845868825912476,
|
| 108832 |
+
"step": 15546
|
| 108833 |
+
},
|
| 108834 |
+
{
|
| 108835 |
+
"epoch": 19.076073619631902,
|
| 108836 |
+
"grad_norm": 0.3004922568798065,
|
| 108837 |
+
"learning_rate": 2.8589416125381076e-07,
|
| 108838 |
+
"loss": 0.4892275333404541,
|
| 108839 |
+
"step": 15547
|
| 108840 |
+
},
|
| 108841 |
+
{
|
| 108842 |
+
"epoch": 19.07730061349693,
|
| 108843 |
+
"grad_norm": 0.2762199640274048,
|
| 108844 |
+
"learning_rate": 2.851377680692241e-07,
|
| 108845 |
+
"loss": 0.585484504699707,
|
| 108846 |
+
"step": 15548
|
| 108847 |
+
},
|
| 108848 |
+
{
|
| 108849 |
+
"epoch": 19.078527607361963,
|
| 108850 |
+
"grad_norm": 0.2855587899684906,
|
| 108851 |
+
"learning_rate": 2.843823710702437e-07,
|
| 108852 |
+
"loss": 0.4709779918193817,
|
| 108853 |
+
"step": 15549
|
| 108854 |
+
},
|
| 108855 |
+
{
|
| 108856 |
+
"epoch": 19.079754601226995,
|
| 108857 |
+
"grad_norm": 0.2726927697658539,
|
| 108858 |
+
"learning_rate": 2.8362797028732014e-07,
|
| 108859 |
+
"loss": 0.6216049790382385,
|
| 108860 |
+
"step": 15550
|
| 108861 |
+
},
|
| 108862 |
+
{
|
| 108863 |
+
"epoch": 19.080981595092023,
|
| 108864 |
+
"grad_norm": 0.3027653098106384,
|
| 108865 |
+
"learning_rate": 2.828745657508597e-07,
|
| 108866 |
+
"loss": 0.6423373818397522,
|
| 108867 |
+
"step": 15551
|
| 108868 |
+
},
|
| 108869 |
+
{
|
| 108870 |
+
"epoch": 19.082208588957055,
|
| 108871 |
+
"grad_norm": 0.24247443675994873,
|
| 108872 |
+
"learning_rate": 2.8212215749122975e-07,
|
| 108873 |
+
"loss": 0.6281909942626953,
|
| 108874 |
+
"step": 15552
|
| 108875 |
+
},
|
| 108876 |
+
{
|
| 108877 |
+
"epoch": 19.083435582822087,
|
| 108878 |
+
"grad_norm": 0.2667468190193176,
|
| 108879 |
+
"learning_rate": 2.8137074553875873e-07,
|
| 108880 |
+
"loss": 0.7013019919395447,
|
| 108881 |
+
"step": 15553
|
| 108882 |
+
},
|
| 108883 |
+
{
|
| 108884 |
+
"epoch": 19.084662576687116,
|
| 108885 |
+
"grad_norm": 0.24377913773059845,
|
| 108886 |
+
"learning_rate": 2.806203299237309e-07,
|
| 108887 |
+
"loss": 0.5487319827079773,
|
| 108888 |
+
"step": 15554
|
| 108889 |
+
},
|
| 108890 |
+
{
|
| 108891 |
+
"epoch": 19.085889570552148,
|
| 108892 |
+
"grad_norm": 0.25317683815956116,
|
| 108893 |
+
"learning_rate": 2.7987091067639694e-07,
|
| 108894 |
+
"loss": 0.6231718063354492,
|
| 108895 |
+
"step": 15555
|
| 108896 |
+
},
|
| 108897 |
+
{
|
| 108898 |
+
"epoch": 19.08711656441718,
|
| 108899 |
+
"grad_norm": 0.26848191022872925,
|
| 108900 |
+
"learning_rate": 2.7912248782696604e-07,
|
| 108901 |
+
"loss": 0.522138237953186,
|
| 108902 |
+
"step": 15556
|
| 108903 |
+
},
|
| 108904 |
+
{
|
| 108905 |
+
"epoch": 19.088343558282208,
|
| 108906 |
+
"grad_norm": 0.25168463587760925,
|
| 108907 |
+
"learning_rate": 2.7837506140559745e-07,
|
| 108908 |
+
"loss": 0.47587889432907104,
|
| 108909 |
+
"step": 15557
|
| 108910 |
+
},
|
| 108911 |
+
{
|
| 108912 |
+
"epoch": 19.08957055214724,
|
| 108913 |
+
"grad_norm": 0.31205639243125916,
|
| 108914 |
+
"learning_rate": 2.7762863144242256e-07,
|
| 108915 |
+
"loss": 0.621688961982727,
|
| 108916 |
+
"step": 15558
|
| 108917 |
+
},
|
| 108918 |
+
{
|
| 108919 |
+
"epoch": 19.09079754601227,
|
| 108920 |
+
"grad_norm": 0.2645886540412903,
|
| 108921 |
+
"learning_rate": 2.768831979675257e-07,
|
| 108922 |
+
"loss": 0.7117650508880615,
|
| 108923 |
+
"step": 15559
|
| 108924 |
+
},
|
| 108925 |
+
{
|
| 108926 |
+
"epoch": 19.0920245398773,
|
| 108927 |
+
"grad_norm": 0.23457367718219757,
|
| 108928 |
+
"learning_rate": 2.761387610109578e-07,
|
| 108929 |
+
"loss": 0.39615631103515625,
|
| 108930 |
+
"step": 15560
|
| 108931 |
+
},
|
| 108932 |
+
{
|
| 108933 |
+
"epoch": 19.093251533742333,
|
| 108934 |
+
"grad_norm": 0.25403016805648804,
|
| 108935 |
+
"learning_rate": 2.753953206027199e-07,
|
| 108936 |
+
"loss": 0.669093906879425,
|
| 108937 |
+
"step": 15561
|
| 108938 |
+
},
|
| 108939 |
+
{
|
| 108940 |
+
"epoch": 19.09447852760736,
|
| 108941 |
+
"grad_norm": 0.26722466945648193,
|
| 108942 |
+
"learning_rate": 2.746528767727796e-07,
|
| 108943 |
+
"loss": 0.6310396194458008,
|
| 108944 |
+
"step": 15562
|
| 108945 |
+
},
|
| 108946 |
+
{
|
| 108947 |
+
"epoch": 19.095705521472393,
|
| 108948 |
+
"grad_norm": 0.2863602936267853,
|
| 108949 |
+
"learning_rate": 2.7391142955106306e-07,
|
| 108950 |
+
"loss": 0.7216989994049072,
|
| 108951 |
+
"step": 15563
|
| 108952 |
+
},
|
| 108953 |
+
{
|
| 108954 |
+
"epoch": 19.096932515337425,
|
| 108955 |
+
"grad_norm": 0.2409089207649231,
|
| 108956 |
+
"learning_rate": 2.731709789674575e-07,
|
| 108957 |
+
"loss": 0.5503473281860352,
|
| 108958 |
+
"step": 15564
|
| 108959 |
+
},
|
| 108960 |
+
{
|
| 108961 |
+
"epoch": 19.098159509202453,
|
| 108962 |
+
"grad_norm": 0.2542863190174103,
|
| 108963 |
+
"learning_rate": 2.724315250518056e-07,
|
| 108964 |
+
"loss": 0.4675602912902832,
|
| 108965 |
+
"step": 15565
|
| 108966 |
+
},
|
| 108967 |
+
{
|
| 108968 |
+
"epoch": 19.099386503067485,
|
| 108969 |
+
"grad_norm": 0.3128001093864441,
|
| 108970 |
+
"learning_rate": 2.7169306783391413e-07,
|
| 108971 |
+
"loss": 0.5894114375114441,
|
| 108972 |
+
"step": 15566
|
| 108973 |
+
},
|
| 108974 |
+
{
|
| 108975 |
+
"epoch": 19.100613496932514,
|
| 108976 |
+
"grad_norm": 0.27291339635849,
|
| 108977 |
+
"learning_rate": 2.709556073435482e-07,
|
| 108978 |
+
"loss": 0.49833545088768005,
|
| 108979 |
+
"step": 15567
|
| 108980 |
+
},
|
| 108981 |
+
{
|
| 108982 |
+
"epoch": 19.101840490797546,
|
| 108983 |
+
"grad_norm": 0.2911783754825592,
|
| 108984 |
+
"learning_rate": 2.7021914361042844e-07,
|
| 108985 |
+
"loss": 0.8547276854515076,
|
| 108986 |
+
"step": 15568
|
| 108987 |
+
},
|
| 108988 |
+
{
|
| 108989 |
+
"epoch": 19.103067484662578,
|
| 108990 |
+
"grad_norm": 0.26530611515045166,
|
| 108991 |
+
"learning_rate": 2.694836766642478e-07,
|
| 108992 |
+
"loss": 0.5670586824417114,
|
| 108993 |
+
"step": 15569
|
| 108994 |
+
},
|
| 108995 |
+
{
|
| 108996 |
+
"epoch": 19.104294478527606,
|
| 108997 |
+
"grad_norm": 0.26550209522247314,
|
| 108998 |
+
"learning_rate": 2.687492065346464e-07,
|
| 108999 |
+
"loss": 0.6865995526313782,
|
| 109000 |
+
"step": 15570
|
| 109001 |
+
},
|
| 109002 |
+
{
|
| 109003 |
+
"epoch": 19.10552147239264,
|
| 109004 |
+
"grad_norm": 0.27784350514411926,
|
| 109005 |
+
"learning_rate": 2.6801573325122565e-07,
|
| 109006 |
+
"loss": 0.595111608505249,
|
| 109007 |
+
"step": 15571
|
| 109008 |
+
},
|
| 109009 |
+
{
|
| 109010 |
+
"epoch": 19.10674846625767,
|
| 109011 |
+
"grad_norm": 0.25161775946617126,
|
| 109012 |
+
"learning_rate": 2.6728325684355625e-07,
|
| 109013 |
+
"loss": 0.5138437747955322,
|
| 109014 |
+
"step": 15572
|
| 109015 |
+
},
|
| 109016 |
+
{
|
| 109017 |
+
"epoch": 19.1079754601227,
|
| 109018 |
+
"grad_norm": 0.2635716199874878,
|
| 109019 |
+
"learning_rate": 2.6655177734115634e-07,
|
| 109020 |
+
"loss": 0.48764345049858093,
|
| 109021 |
+
"step": 15573
|
| 109022 |
+
},
|
| 109023 |
+
{
|
| 109024 |
+
"epoch": 19.10920245398773,
|
| 109025 |
+
"grad_norm": 0.25207483768463135,
|
| 109026 |
+
"learning_rate": 2.6582129477351336e-07,
|
| 109027 |
+
"loss": 0.30982908606529236,
|
| 109028 |
+
"step": 15574
|
| 109029 |
+
},
|
| 109030 |
+
{
|
| 109031 |
+
"epoch": 19.11042944785276,
|
| 109032 |
+
"grad_norm": 0.29243355989456177,
|
| 109033 |
+
"learning_rate": 2.650918091700705e-07,
|
| 109034 |
+
"loss": 0.7353715300559998,
|
| 109035 |
+
"step": 15575
|
| 109036 |
+
},
|
| 109037 |
+
{
|
| 109038 |
+
"epoch": 19.11165644171779,
|
| 109039 |
+
"grad_norm": 0.2807004153728485,
|
| 109040 |
+
"learning_rate": 2.6436332056022917e-07,
|
| 109041 |
+
"loss": 0.580844521522522,
|
| 109042 |
+
"step": 15576
|
| 109043 |
+
},
|
| 109044 |
+
{
|
| 109045 |
+
"epoch": 19.112883435582823,
|
| 109046 |
+
"grad_norm": 0.25005725026130676,
|
| 109047 |
+
"learning_rate": 2.6363582897335484e-07,
|
| 109048 |
+
"loss": 0.4646947681903839,
|
| 109049 |
+
"step": 15577
|
| 109050 |
+
},
|
| 109051 |
+
{
|
| 109052 |
+
"epoch": 19.11411042944785,
|
| 109053 |
+
"grad_norm": 0.24425868690013885,
|
| 109054 |
+
"learning_rate": 2.629093344387684e-07,
|
| 109055 |
+
"loss": 0.7046242356300354,
|
| 109056 |
+
"step": 15578
|
| 109057 |
+
},
|
| 109058 |
+
{
|
| 109059 |
+
"epoch": 19.115337423312884,
|
| 109060 |
+
"grad_norm": 0.26282840967178345,
|
| 109061 |
+
"learning_rate": 2.6218383698575765e-07,
|
| 109062 |
+
"loss": 0.6420117616653442,
|
| 109063 |
+
"step": 15579
|
| 109064 |
+
},
|
| 109065 |
+
{
|
| 109066 |
+
"epoch": 19.116564417177916,
|
| 109067 |
+
"grad_norm": 0.272282212972641,
|
| 109068 |
+
"learning_rate": 2.6145933664355746e-07,
|
| 109069 |
+
"loss": 0.6844086647033691,
|
| 109070 |
+
"step": 15580
|
| 109071 |
+
},
|
| 109072 |
+
{
|
| 109073 |
+
"epoch": 19.117791411042944,
|
| 109074 |
+
"grad_norm": 0.2749355435371399,
|
| 109075 |
+
"learning_rate": 2.6073583344137787e-07,
|
| 109076 |
+
"loss": 0.5719791650772095,
|
| 109077 |
+
"step": 15581
|
| 109078 |
+
},
|
| 109079 |
+
{
|
| 109080 |
+
"epoch": 19.119018404907976,
|
| 109081 |
+
"grad_norm": 0.28105154633522034,
|
| 109082 |
+
"learning_rate": 2.600133274083788e-07,
|
| 109083 |
+
"loss": 0.7059277296066284,
|
| 109084 |
+
"step": 15582
|
| 109085 |
+
},
|
| 109086 |
+
{
|
| 109087 |
+
"epoch": 19.120245398773005,
|
| 109088 |
+
"grad_norm": 0.2757807970046997,
|
| 109089 |
+
"learning_rate": 2.5929181857368146e-07,
|
| 109090 |
+
"loss": 0.5308383703231812,
|
| 109091 |
+
"step": 15583
|
| 109092 |
+
},
|
| 109093 |
+
{
|
| 109094 |
+
"epoch": 19.121472392638037,
|
| 109095 |
+
"grad_norm": 0.2569369971752167,
|
| 109096 |
+
"learning_rate": 2.5857130696636813e-07,
|
| 109097 |
+
"loss": 0.5078203678131104,
|
| 109098 |
+
"step": 15584
|
| 109099 |
+
},
|
| 109100 |
+
{
|
| 109101 |
+
"epoch": 19.12269938650307,
|
| 109102 |
+
"grad_norm": 0.27016228437423706,
|
| 109103 |
+
"learning_rate": 2.5785179261547943e-07,
|
| 109104 |
+
"loss": 0.5279009342193604,
|
| 109105 |
+
"step": 15585
|
| 109106 |
+
},
|
| 109107 |
+
{
|
| 109108 |
+
"epoch": 19.123926380368097,
|
| 109109 |
+
"grad_norm": 0.2662789821624756,
|
| 109110 |
+
"learning_rate": 2.5713327555002e-07,
|
| 109111 |
+
"loss": 0.657353401184082,
|
| 109112 |
+
"step": 15586
|
| 109113 |
+
},
|
| 109114 |
+
{
|
| 109115 |
+
"epoch": 19.12515337423313,
|
| 109116 |
+
"grad_norm": 0.24857038259506226,
|
| 109117 |
+
"learning_rate": 2.564157557989472e-07,
|
| 109118 |
+
"loss": 0.6883531808853149,
|
| 109119 |
+
"step": 15587
|
| 109120 |
+
},
|
| 109121 |
+
{
|
| 109122 |
+
"epoch": 19.12638036809816,
|
| 109123 |
+
"grad_norm": 0.24477167427539825,
|
| 109124 |
+
"learning_rate": 2.556992333911851e-07,
|
| 109125 |
+
"loss": 0.4771386981010437,
|
| 109126 |
+
"step": 15588
|
| 109127 |
+
},
|
| 109128 |
+
{
|
| 109129 |
+
"epoch": 19.12760736196319,
|
| 109130 |
+
"grad_norm": 0.272159218788147,
|
| 109131 |
+
"learning_rate": 2.549837083556161e-07,
|
| 109132 |
+
"loss": 0.7366246581077576,
|
| 109133 |
+
"step": 15589
|
| 109134 |
+
},
|
| 109135 |
+
{
|
| 109136 |
+
"epoch": 19.12883435582822,
|
| 109137 |
+
"grad_norm": 0.2629441022872925,
|
| 109138 |
+
"learning_rate": 2.5426918072107566e-07,
|
| 109139 |
+
"loss": 0.47775810956954956,
|
| 109140 |
+
"step": 15590
|
| 109141 |
+
},
|
| 109142 |
+
{
|
| 109143 |
+
"epoch": 19.13006134969325,
|
| 109144 |
+
"grad_norm": 0.29105567932128906,
|
| 109145 |
+
"learning_rate": 2.5355565051636833e-07,
|
| 109146 |
+
"loss": 0.5888192653656006,
|
| 109147 |
+
"step": 15591
|
| 109148 |
+
},
|
| 109149 |
+
{
|
| 109150 |
+
"epoch": 19.131288343558282,
|
| 109151 |
+
"grad_norm": 0.2917846441268921,
|
| 109152 |
+
"learning_rate": 2.528431177702545e-07,
|
| 109153 |
+
"loss": 0.7695599794387817,
|
| 109154 |
+
"step": 15592
|
| 109155 |
+
},
|
| 109156 |
+
{
|
| 109157 |
+
"epoch": 19.132515337423314,
|
| 109158 |
+
"grad_norm": 0.2750089466571808,
|
| 109159 |
+
"learning_rate": 2.521315825114529e-07,
|
| 109160 |
+
"loss": 0.5887625217437744,
|
| 109161 |
+
"step": 15593
|
| 109162 |
+
},
|
| 109163 |
+
{
|
| 109164 |
+
"epoch": 19.133742331288342,
|
| 109165 |
+
"grad_norm": 0.23905760049819946,
|
| 109166 |
+
"learning_rate": 2.51421044768646e-07,
|
| 109167 |
+
"loss": 0.6818940043449402,
|
| 109168 |
+
"step": 15594
|
| 109169 |
+
},
|
| 109170 |
+
{
|
| 109171 |
+
"epoch": 19.134969325153374,
|
| 109172 |
+
"grad_norm": 0.2392745018005371,
|
| 109173 |
+
"learning_rate": 2.5071150457046944e-07,
|
| 109174 |
+
"loss": 0.4888817071914673,
|
| 109175 |
+
"step": 15595
|
| 109176 |
+
},
|
| 109177 |
+
{
|
| 109178 |
+
"epoch": 19.136196319018406,
|
| 109179 |
+
"grad_norm": 0.25896742939949036,
|
| 109180 |
+
"learning_rate": 2.500029619455252e-07,
|
| 109181 |
+
"loss": 0.6466255187988281,
|
| 109182 |
+
"step": 15596
|
| 109183 |
+
},
|
| 109184 |
+
{
|
| 109185 |
+
"epoch": 19.137423312883435,
|
| 109186 |
+
"grad_norm": 0.26890119910240173,
|
| 109187 |
+
"learning_rate": 2.492954169223738e-07,
|
| 109188 |
+
"loss": 0.4519672393798828,
|
| 109189 |
+
"step": 15597
|
| 109190 |
+
},
|
| 109191 |
+
{
|
| 109192 |
+
"epoch": 19.138650306748467,
|
| 109193 |
+
"grad_norm": 0.2772124707698822,
|
| 109194 |
+
"learning_rate": 2.4858886952953133e-07,
|
| 109195 |
+
"loss": 0.6026498079299927,
|
| 109196 |
+
"step": 15598
|
| 109197 |
+
},
|
| 109198 |
+
{
|
| 109199 |
+
"epoch": 19.139877300613495,
|
| 109200 |
+
"grad_norm": 0.24094004929065704,
|
| 109201 |
+
"learning_rate": 2.478833197954805e-07,
|
| 109202 |
+
"loss": 0.44715774059295654,
|
| 109203 |
+
"step": 15599
|
| 109204 |
+
},
|
| 109205 |
+
{
|
| 109206 |
+
"epoch": 19.141104294478527,
|
| 109207 |
+
"grad_norm": 0.21787197887897491,
|
| 109208 |
+
"learning_rate": 2.4717876774865425e-07,
|
| 109209 |
+
"loss": 0.46103155612945557,
|
| 109210 |
+
"step": 15600
|
| 109211 |
}
|
| 109212 |
],
|
| 109213 |
"logging_steps": 1,
|
|
|
|
| 109227 |
"attributes": {}
|
| 109228 |
}
|
| 109229 |
},
|
| 109230 |
+
"total_flos": 4.3634092410006405e+19,
|
| 109231 |
"train_batch_size": 8,
|
| 109232 |
"trial_name": null,
|
| 109233 |
"trial_params": null
|