Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fd9fa207bc10039bd14a5700d3d5e9be50df52acc91855dba2115fcc8b7a6b7
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6c4cd489dc23a355516909df1f1616dbd0bb5d3a1868ce4be9f454385a277a8
|
| 3 |
size 173247691
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9445552595536daf5bd8731be4eabb308bd26e76a3f4f0c20c4aa55fcf9ea202
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c605898ea36012a62e8676dfac2213cf11505a1ed8b0d577e03e8a5a175cd04
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -102908,6 +102908,2106 @@
|
|
| 102908 |
"learning_rate": 3.4703341974541616e-06,
|
| 102909 |
"loss": 0.4998,
|
| 102910 |
"step": 14700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102911 |
}
|
| 102912 |
],
|
| 102913 |
"logging_steps": 1,
|
|
@@ -102927,7 +105027,7 @@
|
|
| 102927 |
"attributes": {}
|
| 102928 |
}
|
| 102929 |
},
|
| 102930 |
-
"total_flos": 8.
|
| 102931 |
"train_batch_size": 8,
|
| 102932 |
"trial_name": null,
|
| 102933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 17.084330484330483,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 102908 |
"learning_rate": 3.4703341974541616e-06,
|
| 102909 |
"loss": 0.4998,
|
| 102910 |
"step": 14700
|
| 102911 |
+
},
|
| 102912 |
+
{
|
| 102913 |
+
"epoch": 16.744159544159544,
|
| 102914 |
+
"grad_norm": 0.2202627956867218,
|
| 102915 |
+
"learning_rate": 3.4679663614047593e-06,
|
| 102916 |
+
"loss": 0.5517,
|
| 102917 |
+
"step": 14701
|
| 102918 |
+
},
|
| 102919 |
+
{
|
| 102920 |
+
"epoch": 16.745299145299146,
|
| 102921 |
+
"grad_norm": 0.2174326777458191,
|
| 102922 |
+
"learning_rate": 3.465599273222089e-06,
|
| 102923 |
+
"loss": 0.7179,
|
| 102924 |
+
"step": 14702
|
| 102925 |
+
},
|
| 102926 |
+
{
|
| 102927 |
+
"epoch": 16.746438746438745,
|
| 102928 |
+
"grad_norm": 0.20844949781894684,
|
| 102929 |
+
"learning_rate": 3.463232932988378e-06,
|
| 102930 |
+
"loss": 0.5682,
|
| 102931 |
+
"step": 14703
|
| 102932 |
+
},
|
| 102933 |
+
{
|
| 102934 |
+
"epoch": 16.747578347578347,
|
| 102935 |
+
"grad_norm": 0.2191018909215927,
|
| 102936 |
+
"learning_rate": 3.4608673407858144e-06,
|
| 102937 |
+
"loss": 0.4616,
|
| 102938 |
+
"step": 14704
|
| 102939 |
+
},
|
| 102940 |
+
{
|
| 102941 |
+
"epoch": 16.74871794871795,
|
| 102942 |
+
"grad_norm": 0.23420454561710358,
|
| 102943 |
+
"learning_rate": 3.458502496696564e-06,
|
| 102944 |
+
"loss": 0.4996,
|
| 102945 |
+
"step": 14705
|
| 102946 |
+
},
|
| 102947 |
+
{
|
| 102948 |
+
"epoch": 16.74985754985755,
|
| 102949 |
+
"grad_norm": 0.24128484725952148,
|
| 102950 |
+
"learning_rate": 3.4561384008027524e-06,
|
| 102951 |
+
"loss": 0.492,
|
| 102952 |
+
"step": 14706
|
| 102953 |
+
},
|
| 102954 |
+
{
|
| 102955 |
+
"epoch": 16.75099715099715,
|
| 102956 |
+
"grad_norm": 0.20390881597995758,
|
| 102957 |
+
"learning_rate": 3.453775053186503e-06,
|
| 102958 |
+
"loss": 0.5434,
|
| 102959 |
+
"step": 14707
|
| 102960 |
+
},
|
| 102961 |
+
{
|
| 102962 |
+
"epoch": 16.752136752136753,
|
| 102963 |
+
"grad_norm": 0.22439153492450714,
|
| 102964 |
+
"learning_rate": 3.4514124539299e-06,
|
| 102965 |
+
"loss": 0.7022,
|
| 102966 |
+
"step": 14708
|
| 102967 |
+
},
|
| 102968 |
+
{
|
| 102969 |
+
"epoch": 16.753276353276352,
|
| 102970 |
+
"grad_norm": 0.19062481820583344,
|
| 102971 |
+
"learning_rate": 3.4490506031150087e-06,
|
| 102972 |
+
"loss": 0.6312,
|
| 102973 |
+
"step": 14709
|
| 102974 |
+
},
|
| 102975 |
+
{
|
| 102976 |
+
"epoch": 16.754415954415954,
|
| 102977 |
+
"grad_norm": 0.17792372405529022,
|
| 102978 |
+
"learning_rate": 3.4466895008238463e-06,
|
| 102979 |
+
"loss": 0.726,
|
| 102980 |
+
"step": 14710
|
| 102981 |
+
},
|
| 102982 |
+
{
|
| 102983 |
+
"epoch": 16.755555555555556,
|
| 102984 |
+
"grad_norm": 0.22493785619735718,
|
| 102985 |
+
"learning_rate": 3.4443291471384308e-06,
|
| 102986 |
+
"loss": 0.6689,
|
| 102987 |
+
"step": 14711
|
| 102988 |
+
},
|
| 102989 |
+
{
|
| 102990 |
+
"epoch": 16.756695156695155,
|
| 102991 |
+
"grad_norm": 0.24571868777275085,
|
| 102992 |
+
"learning_rate": 3.441969542140744e-06,
|
| 102993 |
+
"loss": 0.6001,
|
| 102994 |
+
"step": 14712
|
| 102995 |
+
},
|
| 102996 |
+
{
|
| 102997 |
+
"epoch": 16.757834757834758,
|
| 102998 |
+
"grad_norm": 0.19063417613506317,
|
| 102999 |
+
"learning_rate": 3.4396106859127447e-06,
|
| 103000 |
+
"loss": 0.6952,
|
| 103001 |
+
"step": 14713
|
| 103002 |
+
},
|
| 103003 |
+
{
|
| 103004 |
+
"epoch": 16.75897435897436,
|
| 103005 |
+
"grad_norm": 0.2070053070783615,
|
| 103006 |
+
"learning_rate": 3.4372525785363512e-06,
|
| 103007 |
+
"loss": 0.6566,
|
| 103008 |
+
"step": 14714
|
| 103009 |
+
},
|
| 103010 |
+
{
|
| 103011 |
+
"epoch": 16.76011396011396,
|
| 103012 |
+
"grad_norm": 0.18680186569690704,
|
| 103013 |
+
"learning_rate": 3.434895220093473e-06,
|
| 103014 |
+
"loss": 0.6214,
|
| 103015 |
+
"step": 14715
|
| 103016 |
+
},
|
| 103017 |
+
{
|
| 103018 |
+
"epoch": 16.76125356125356,
|
| 103019 |
+
"grad_norm": 0.22149209678173065,
|
| 103020 |
+
"learning_rate": 3.4325386106659892e-06,
|
| 103021 |
+
"loss": 0.7235,
|
| 103022 |
+
"step": 14716
|
| 103023 |
+
},
|
| 103024 |
+
{
|
| 103025 |
+
"epoch": 16.762393162393163,
|
| 103026 |
+
"grad_norm": 0.2250470668077469,
|
| 103027 |
+
"learning_rate": 3.4301827503357474e-06,
|
| 103028 |
+
"loss": 0.7235,
|
| 103029 |
+
"step": 14717
|
| 103030 |
+
},
|
| 103031 |
+
{
|
| 103032 |
+
"epoch": 16.763532763532762,
|
| 103033 |
+
"grad_norm": 0.1864991933107376,
|
| 103034 |
+
"learning_rate": 3.427827639184578e-06,
|
| 103035 |
+
"loss": 0.5742,
|
| 103036 |
+
"step": 14718
|
| 103037 |
+
},
|
| 103038 |
+
{
|
| 103039 |
+
"epoch": 16.764672364672364,
|
| 103040 |
+
"grad_norm": 0.19813434779644012,
|
| 103041 |
+
"learning_rate": 3.4254732772942804e-06,
|
| 103042 |
+
"loss": 0.661,
|
| 103043 |
+
"step": 14719
|
| 103044 |
+
},
|
| 103045 |
+
{
|
| 103046 |
+
"epoch": 16.765811965811967,
|
| 103047 |
+
"grad_norm": 0.2334143966436386,
|
| 103048 |
+
"learning_rate": 3.4231196647466295e-06,
|
| 103049 |
+
"loss": 0.7577,
|
| 103050 |
+
"step": 14720
|
| 103051 |
+
},
|
| 103052 |
+
{
|
| 103053 |
+
"epoch": 16.766951566951565,
|
| 103054 |
+
"grad_norm": 0.16335418820381165,
|
| 103055 |
+
"learning_rate": 3.4207668016233626e-06,
|
| 103056 |
+
"loss": 0.6523,
|
| 103057 |
+
"step": 14721
|
| 103058 |
+
},
|
| 103059 |
+
{
|
| 103060 |
+
"epoch": 16.768091168091168,
|
| 103061 |
+
"grad_norm": 0.1976618766784668,
|
| 103062 |
+
"learning_rate": 3.4184146880062105e-06,
|
| 103063 |
+
"loss": 0.7471,
|
| 103064 |
+
"step": 14722
|
| 103065 |
+
},
|
| 103066 |
+
{
|
| 103067 |
+
"epoch": 16.76923076923077,
|
| 103068 |
+
"grad_norm": 0.16605301201343536,
|
| 103069 |
+
"learning_rate": 3.4160633239768618e-06,
|
| 103070 |
+
"loss": 0.6936,
|
| 103071 |
+
"step": 14723
|
| 103072 |
+
},
|
| 103073 |
+
{
|
| 103074 |
+
"epoch": 16.77037037037037,
|
| 103075 |
+
"grad_norm": 0.2468760907649994,
|
| 103076 |
+
"learning_rate": 3.4137127096170007e-06,
|
| 103077 |
+
"loss": 0.4153,
|
| 103078 |
+
"step": 14724
|
| 103079 |
+
},
|
| 103080 |
+
{
|
| 103081 |
+
"epoch": 16.77150997150997,
|
| 103082 |
+
"grad_norm": 0.22643277049064636,
|
| 103083 |
+
"learning_rate": 3.411362845008251e-06,
|
| 103084 |
+
"loss": 0.7011,
|
| 103085 |
+
"step": 14725
|
| 103086 |
+
},
|
| 103087 |
+
{
|
| 103088 |
+
"epoch": 16.772649572649573,
|
| 103089 |
+
"grad_norm": 0.20217841863632202,
|
| 103090 |
+
"learning_rate": 3.409013730232238e-06,
|
| 103091 |
+
"loss": 0.4763,
|
| 103092 |
+
"step": 14726
|
| 103093 |
+
},
|
| 103094 |
+
{
|
| 103095 |
+
"epoch": 16.773789173789172,
|
| 103096 |
+
"grad_norm": 0.17784053087234497,
|
| 103097 |
+
"learning_rate": 3.4066653653705566e-06,
|
| 103098 |
+
"loss": 0.845,
|
| 103099 |
+
"step": 14727
|
| 103100 |
+
},
|
| 103101 |
+
{
|
| 103102 |
+
"epoch": 16.774928774928775,
|
| 103103 |
+
"grad_norm": 0.238676518201828,
|
| 103104 |
+
"learning_rate": 3.404317750504765e-06,
|
| 103105 |
+
"loss": 0.541,
|
| 103106 |
+
"step": 14728
|
| 103107 |
+
},
|
| 103108 |
+
{
|
| 103109 |
+
"epoch": 16.776068376068377,
|
| 103110 |
+
"grad_norm": 0.23390735685825348,
|
| 103111 |
+
"learning_rate": 3.4019708857164102e-06,
|
| 103112 |
+
"loss": 0.7431,
|
| 103113 |
+
"step": 14729
|
| 103114 |
+
},
|
| 103115 |
+
{
|
| 103116 |
+
"epoch": 16.777207977207976,
|
| 103117 |
+
"grad_norm": 0.25913941860198975,
|
| 103118 |
+
"learning_rate": 3.399624771086998e-06,
|
| 103119 |
+
"loss": 0.6154,
|
| 103120 |
+
"step": 14730
|
| 103121 |
+
},
|
| 103122 |
+
{
|
| 103123 |
+
"epoch": 16.778347578347578,
|
| 103124 |
+
"grad_norm": 0.17202185094356537,
|
| 103125 |
+
"learning_rate": 3.3972794066980256e-06,
|
| 103126 |
+
"loss": 0.9398,
|
| 103127 |
+
"step": 14731
|
| 103128 |
+
},
|
| 103129 |
+
{
|
| 103130 |
+
"epoch": 16.77948717948718,
|
| 103131 |
+
"grad_norm": 0.16503441333770752,
|
| 103132 |
+
"learning_rate": 3.3949347926309437e-06,
|
| 103133 |
+
"loss": 0.4665,
|
| 103134 |
+
"step": 14732
|
| 103135 |
+
},
|
| 103136 |
+
{
|
| 103137 |
+
"epoch": 16.78062678062678,
|
| 103138 |
+
"grad_norm": 0.21525779366493225,
|
| 103139 |
+
"learning_rate": 3.3925909289671876e-06,
|
| 103140 |
+
"loss": 0.6305,
|
| 103141 |
+
"step": 14733
|
| 103142 |
+
},
|
| 103143 |
+
{
|
| 103144 |
+
"epoch": 16.78176638176638,
|
| 103145 |
+
"grad_norm": 0.21760420501232147,
|
| 103146 |
+
"learning_rate": 3.3902478157881723e-06,
|
| 103147 |
+
"loss": 0.6829,
|
| 103148 |
+
"step": 14734
|
| 103149 |
+
},
|
| 103150 |
+
{
|
| 103151 |
+
"epoch": 16.782905982905984,
|
| 103152 |
+
"grad_norm": 0.27629998326301575,
|
| 103153 |
+
"learning_rate": 3.3879054531752803e-06,
|
| 103154 |
+
"loss": 0.5113,
|
| 103155 |
+
"step": 14735
|
| 103156 |
+
},
|
| 103157 |
+
{
|
| 103158 |
+
"epoch": 16.784045584045582,
|
| 103159 |
+
"grad_norm": 0.32538872957229614,
|
| 103160 |
+
"learning_rate": 3.38556384120986e-06,
|
| 103161 |
+
"loss": 0.6818,
|
| 103162 |
+
"step": 14736
|
| 103163 |
+
},
|
| 103164 |
+
{
|
| 103165 |
+
"epoch": 16.785185185185185,
|
| 103166 |
+
"grad_norm": 0.1555013358592987,
|
| 103167 |
+
"learning_rate": 3.383222979973247e-06,
|
| 103168 |
+
"loss": 0.6033,
|
| 103169 |
+
"step": 14737
|
| 103170 |
+
},
|
| 103171 |
+
{
|
| 103172 |
+
"epoch": 16.786324786324787,
|
| 103173 |
+
"grad_norm": 0.20977364480495453,
|
| 103174 |
+
"learning_rate": 3.3808828695467476e-06,
|
| 103175 |
+
"loss": 0.5296,
|
| 103176 |
+
"step": 14738
|
| 103177 |
+
},
|
| 103178 |
+
{
|
| 103179 |
+
"epoch": 16.787464387464386,
|
| 103180 |
+
"grad_norm": 0.17263485491275787,
|
| 103181 |
+
"learning_rate": 3.378543510011639e-06,
|
| 103182 |
+
"loss": 0.6992,
|
| 103183 |
+
"step": 14739
|
| 103184 |
+
},
|
| 103185 |
+
{
|
| 103186 |
+
"epoch": 16.788603988603988,
|
| 103187 |
+
"grad_norm": 0.16598084568977356,
|
| 103188 |
+
"learning_rate": 3.376204901449173e-06,
|
| 103189 |
+
"loss": 0.7407,
|
| 103190 |
+
"step": 14740
|
| 103191 |
+
},
|
| 103192 |
+
{
|
| 103193 |
+
"epoch": 16.78974358974359,
|
| 103194 |
+
"grad_norm": 0.17881295084953308,
|
| 103195 |
+
"learning_rate": 3.3738670439405763e-06,
|
| 103196 |
+
"loss": 0.8125,
|
| 103197 |
+
"step": 14741
|
| 103198 |
+
},
|
| 103199 |
+
{
|
| 103200 |
+
"epoch": 16.79088319088319,
|
| 103201 |
+
"grad_norm": 0.16692639887332916,
|
| 103202 |
+
"learning_rate": 3.3715299375670554e-06,
|
| 103203 |
+
"loss": 0.8586,
|
| 103204 |
+
"step": 14742
|
| 103205 |
+
},
|
| 103206 |
+
{
|
| 103207 |
+
"epoch": 16.79202279202279,
|
| 103208 |
+
"grad_norm": 0.22679796814918518,
|
| 103209 |
+
"learning_rate": 3.3691935824097713e-06,
|
| 103210 |
+
"loss": 0.6736,
|
| 103211 |
+
"step": 14743
|
| 103212 |
+
},
|
| 103213 |
+
{
|
| 103214 |
+
"epoch": 16.793162393162394,
|
| 103215 |
+
"grad_norm": 0.2388918399810791,
|
| 103216 |
+
"learning_rate": 3.3668579785498812e-06,
|
| 103217 |
+
"loss": 0.5589,
|
| 103218 |
+
"step": 14744
|
| 103219 |
+
},
|
| 103220 |
+
{
|
| 103221 |
+
"epoch": 16.794301994301993,
|
| 103222 |
+
"grad_norm": 0.18057364225387573,
|
| 103223 |
+
"learning_rate": 3.3645231260685054e-06,
|
| 103224 |
+
"loss": 0.8457,
|
| 103225 |
+
"step": 14745
|
| 103226 |
+
},
|
| 103227 |
+
{
|
| 103228 |
+
"epoch": 16.795441595441595,
|
| 103229 |
+
"grad_norm": 0.19479382038116455,
|
| 103230 |
+
"learning_rate": 3.362189025046736e-06,
|
| 103231 |
+
"loss": 0.8523,
|
| 103232 |
+
"step": 14746
|
| 103233 |
+
},
|
| 103234 |
+
{
|
| 103235 |
+
"epoch": 16.796581196581197,
|
| 103236 |
+
"grad_norm": 0.18581032752990723,
|
| 103237 |
+
"learning_rate": 3.359855675565654e-06,
|
| 103238 |
+
"loss": 0.8232,
|
| 103239 |
+
"step": 14747
|
| 103240 |
+
},
|
| 103241 |
+
{
|
| 103242 |
+
"epoch": 16.797720797720796,
|
| 103243 |
+
"grad_norm": 0.19107264280319214,
|
| 103244 |
+
"learning_rate": 3.357523077706287e-06,
|
| 103245 |
+
"loss": 0.5805,
|
| 103246 |
+
"step": 14748
|
| 103247 |
+
},
|
| 103248 |
+
{
|
| 103249 |
+
"epoch": 16.7988603988604,
|
| 103250 |
+
"grad_norm": 0.21707648038864136,
|
| 103251 |
+
"learning_rate": 3.355191231549665e-06,
|
| 103252 |
+
"loss": 0.4581,
|
| 103253 |
+
"step": 14749
|
| 103254 |
+
},
|
| 103255 |
+
{
|
| 103256 |
+
"epoch": 16.8,
|
| 103257 |
+
"grad_norm": 0.17213472723960876,
|
| 103258 |
+
"learning_rate": 3.3528601371767722e-06,
|
| 103259 |
+
"loss": 0.6278,
|
| 103260 |
+
"step": 14750
|
| 103261 |
+
},
|
| 103262 |
+
{
|
| 103263 |
+
"epoch": 16.8011396011396,
|
| 103264 |
+
"grad_norm": 0.18237662315368652,
|
| 103265 |
+
"learning_rate": 3.3505297946685748e-06,
|
| 103266 |
+
"loss": 0.7756,
|
| 103267 |
+
"step": 14751
|
| 103268 |
+
},
|
| 103269 |
+
{
|
| 103270 |
+
"epoch": 16.802279202279202,
|
| 103271 |
+
"grad_norm": 0.2457255870103836,
|
| 103272 |
+
"learning_rate": 3.3482002041060166e-06,
|
| 103273 |
+
"loss": 0.6011,
|
| 103274 |
+
"step": 14752
|
| 103275 |
+
},
|
| 103276 |
+
{
|
| 103277 |
+
"epoch": 16.803418803418804,
|
| 103278 |
+
"grad_norm": 0.21424660086631775,
|
| 103279 |
+
"learning_rate": 3.345871365570008e-06,
|
| 103280 |
+
"loss": 0.6095,
|
| 103281 |
+
"step": 14753
|
| 103282 |
+
},
|
| 103283 |
+
{
|
| 103284 |
+
"epoch": 16.804558404558403,
|
| 103285 |
+
"grad_norm": 0.209234818816185,
|
| 103286 |
+
"learning_rate": 3.3435432791414395e-06,
|
| 103287 |
+
"loss": 0.6892,
|
| 103288 |
+
"step": 14754
|
| 103289 |
+
},
|
| 103290 |
+
{
|
| 103291 |
+
"epoch": 16.805698005698005,
|
| 103292 |
+
"grad_norm": 0.23546189069747925,
|
| 103293 |
+
"learning_rate": 3.341215944901163e-06,
|
| 103294 |
+
"loss": 0.6176,
|
| 103295 |
+
"step": 14755
|
| 103296 |
+
},
|
| 103297 |
+
{
|
| 103298 |
+
"epoch": 16.806837606837608,
|
| 103299 |
+
"grad_norm": 0.18982715904712677,
|
| 103300 |
+
"learning_rate": 3.33888936293002e-06,
|
| 103301 |
+
"loss": 0.8664,
|
| 103302 |
+
"step": 14756
|
| 103303 |
+
},
|
| 103304 |
+
{
|
| 103305 |
+
"epoch": 16.807977207977206,
|
| 103306 |
+
"grad_norm": 0.16256925463676453,
|
| 103307 |
+
"learning_rate": 3.3365635333088173e-06,
|
| 103308 |
+
"loss": 0.6438,
|
| 103309 |
+
"step": 14757
|
| 103310 |
+
},
|
| 103311 |
+
{
|
| 103312 |
+
"epoch": 16.80911680911681,
|
| 103313 |
+
"grad_norm": 0.2089722603559494,
|
| 103314 |
+
"learning_rate": 3.3342384561183436e-06,
|
| 103315 |
+
"loss": 0.6521,
|
| 103316 |
+
"step": 14758
|
| 103317 |
+
},
|
| 103318 |
+
{
|
| 103319 |
+
"epoch": 16.81025641025641,
|
| 103320 |
+
"grad_norm": 0.23766598105430603,
|
| 103321 |
+
"learning_rate": 3.331914131439343e-06,
|
| 103322 |
+
"loss": 0.2682,
|
| 103323 |
+
"step": 14759
|
| 103324 |
+
},
|
| 103325 |
+
{
|
| 103326 |
+
"epoch": 16.81139601139601,
|
| 103327 |
+
"grad_norm": 0.24712476134300232,
|
| 103328 |
+
"learning_rate": 3.329590559352555e-06,
|
| 103329 |
+
"loss": 0.6654,
|
| 103330 |
+
"step": 14760
|
| 103331 |
+
},
|
| 103332 |
+
{
|
| 103333 |
+
"epoch": 16.812535612535612,
|
| 103334 |
+
"grad_norm": 0.1959812343120575,
|
| 103335 |
+
"learning_rate": 3.3272677399386775e-06,
|
| 103336 |
+
"loss": 0.5572,
|
| 103337 |
+
"step": 14761
|
| 103338 |
+
},
|
| 103339 |
+
{
|
| 103340 |
+
"epoch": 16.813675213675214,
|
| 103341 |
+
"grad_norm": 0.19847257435321808,
|
| 103342 |
+
"learning_rate": 3.3249456732783927e-06,
|
| 103343 |
+
"loss": 0.5969,
|
| 103344 |
+
"step": 14762
|
| 103345 |
+
},
|
| 103346 |
+
{
|
| 103347 |
+
"epoch": 16.814814814814813,
|
| 103348 |
+
"grad_norm": 0.21061527729034424,
|
| 103349 |
+
"learning_rate": 3.3226243594523504e-06,
|
| 103350 |
+
"loss": 0.7511,
|
| 103351 |
+
"step": 14763
|
| 103352 |
+
},
|
| 103353 |
+
{
|
| 103354 |
+
"epoch": 16.815954415954415,
|
| 103355 |
+
"grad_norm": 0.22279202938079834,
|
| 103356 |
+
"learning_rate": 3.320303798541177e-06,
|
| 103357 |
+
"loss": 0.639,
|
| 103358 |
+
"step": 14764
|
| 103359 |
+
},
|
| 103360 |
+
{
|
| 103361 |
+
"epoch": 16.817094017094018,
|
| 103362 |
+
"grad_norm": 0.20850226283073425,
|
| 103363 |
+
"learning_rate": 3.3179839906254783e-06,
|
| 103364 |
+
"loss": 0.7963,
|
| 103365 |
+
"step": 14765
|
| 103366 |
+
},
|
| 103367 |
+
{
|
| 103368 |
+
"epoch": 16.81823361823362,
|
| 103369 |
+
"grad_norm": 0.2269270420074463,
|
| 103370 |
+
"learning_rate": 3.3156649357858173e-06,
|
| 103371 |
+
"loss": 0.7152,
|
| 103372 |
+
"step": 14766
|
| 103373 |
+
},
|
| 103374 |
+
{
|
| 103375 |
+
"epoch": 16.81937321937322,
|
| 103376 |
+
"grad_norm": 0.16206906735897064,
|
| 103377 |
+
"learning_rate": 3.313346634102743e-06,
|
| 103378 |
+
"loss": 0.7077,
|
| 103379 |
+
"step": 14767
|
| 103380 |
+
},
|
| 103381 |
+
{
|
| 103382 |
+
"epoch": 16.82051282051282,
|
| 103383 |
+
"grad_norm": 0.19101674854755402,
|
| 103384 |
+
"learning_rate": 3.3110290856567805e-06,
|
| 103385 |
+
"loss": 0.7085,
|
| 103386 |
+
"step": 14768
|
| 103387 |
+
},
|
| 103388 |
+
{
|
| 103389 |
+
"epoch": 16.821652421652423,
|
| 103390 |
+
"grad_norm": 0.24302618205547333,
|
| 103391 |
+
"learning_rate": 3.3087122905284263e-06,
|
| 103392 |
+
"loss": 0.7949,
|
| 103393 |
+
"step": 14769
|
| 103394 |
+
},
|
| 103395 |
+
{
|
| 103396 |
+
"epoch": 16.822792022792022,
|
| 103397 |
+
"grad_norm": 0.160295769572258,
|
| 103398 |
+
"learning_rate": 3.306396248798141e-06,
|
| 103399 |
+
"loss": 0.7553,
|
| 103400 |
+
"step": 14770
|
| 103401 |
+
},
|
| 103402 |
+
{
|
| 103403 |
+
"epoch": 16.823931623931625,
|
| 103404 |
+
"grad_norm": 0.20017482340335846,
|
| 103405 |
+
"learning_rate": 3.304080960546374e-06,
|
| 103406 |
+
"loss": 0.7562,
|
| 103407 |
+
"step": 14771
|
| 103408 |
+
},
|
| 103409 |
+
{
|
| 103410 |
+
"epoch": 16.825071225071227,
|
| 103411 |
+
"grad_norm": 0.21850743889808655,
|
| 103412 |
+
"learning_rate": 3.3017664258535364e-06,
|
| 103413 |
+
"loss": 0.6732,
|
| 103414 |
+
"step": 14772
|
| 103415 |
+
},
|
| 103416 |
+
{
|
| 103417 |
+
"epoch": 16.826210826210826,
|
| 103418 |
+
"grad_norm": 0.19815374910831451,
|
| 103419 |
+
"learning_rate": 3.299452644800022e-06,
|
| 103420 |
+
"loss": 0.692,
|
| 103421 |
+
"step": 14773
|
| 103422 |
+
},
|
| 103423 |
+
{
|
| 103424 |
+
"epoch": 16.827350427350428,
|
| 103425 |
+
"grad_norm": 0.1972140371799469,
|
| 103426 |
+
"learning_rate": 3.297139617466194e-06,
|
| 103427 |
+
"loss": 0.4356,
|
| 103428 |
+
"step": 14774
|
| 103429 |
+
},
|
| 103430 |
+
{
|
| 103431 |
+
"epoch": 16.82849002849003,
|
| 103432 |
+
"grad_norm": 0.1911310851573944,
|
| 103433 |
+
"learning_rate": 3.2948273439323884e-06,
|
| 103434 |
+
"loss": 0.6865,
|
| 103435 |
+
"step": 14775
|
| 103436 |
+
},
|
| 103437 |
+
{
|
| 103438 |
+
"epoch": 16.82962962962963,
|
| 103439 |
+
"grad_norm": 0.22164954245090485,
|
| 103440 |
+
"learning_rate": 3.292515824278927e-06,
|
| 103441 |
+
"loss": 0.4935,
|
| 103442 |
+
"step": 14776
|
| 103443 |
+
},
|
| 103444 |
+
{
|
| 103445 |
+
"epoch": 16.83076923076923,
|
| 103446 |
+
"grad_norm": 0.20343266427516937,
|
| 103447 |
+
"learning_rate": 3.2902050585860792e-06,
|
| 103448 |
+
"loss": 0.5128,
|
| 103449 |
+
"step": 14777
|
| 103450 |
+
},
|
| 103451 |
+
{
|
| 103452 |
+
"epoch": 16.831908831908834,
|
| 103453 |
+
"grad_norm": 0.18866266310214996,
|
| 103454 |
+
"learning_rate": 3.2878950469341112e-06,
|
| 103455 |
+
"loss": 0.9589,
|
| 103456 |
+
"step": 14778
|
| 103457 |
+
},
|
| 103458 |
+
{
|
| 103459 |
+
"epoch": 16.833048433048432,
|
| 103460 |
+
"grad_norm": 0.2625249922275543,
|
| 103461 |
+
"learning_rate": 3.2855857894032588e-06,
|
| 103462 |
+
"loss": 0.5282,
|
| 103463 |
+
"step": 14779
|
| 103464 |
+
},
|
| 103465 |
+
{
|
| 103466 |
+
"epoch": 16.834188034188035,
|
| 103467 |
+
"grad_norm": 0.16464684903621674,
|
| 103468 |
+
"learning_rate": 3.2832772860737325e-06,
|
| 103469 |
+
"loss": 0.5974,
|
| 103470 |
+
"step": 14780
|
| 103471 |
+
},
|
| 103472 |
+
{
|
| 103473 |
+
"epoch": 16.835327635327637,
|
| 103474 |
+
"grad_norm": 0.25545403361320496,
|
| 103475 |
+
"learning_rate": 3.2809695370256993e-06,
|
| 103476 |
+
"loss": 0.5496,
|
| 103477 |
+
"step": 14781
|
| 103478 |
+
},
|
| 103479 |
+
{
|
| 103480 |
+
"epoch": 16.836467236467236,
|
| 103481 |
+
"grad_norm": 0.2255530059337616,
|
| 103482 |
+
"learning_rate": 3.278662542339325e-06,
|
| 103483 |
+
"loss": 0.3252,
|
| 103484 |
+
"step": 14782
|
| 103485 |
+
},
|
| 103486 |
+
{
|
| 103487 |
+
"epoch": 16.837606837606838,
|
| 103488 |
+
"grad_norm": 0.18065612018108368,
|
| 103489 |
+
"learning_rate": 3.276356302094727e-06,
|
| 103490 |
+
"loss": 0.7256,
|
| 103491 |
+
"step": 14783
|
| 103492 |
+
},
|
| 103493 |
+
{
|
| 103494 |
+
"epoch": 16.83874643874644,
|
| 103495 |
+
"grad_norm": 0.18317294120788574,
|
| 103496 |
+
"learning_rate": 3.274050816372026e-06,
|
| 103497 |
+
"loss": 0.6904,
|
| 103498 |
+
"step": 14784
|
| 103499 |
+
},
|
| 103500 |
+
{
|
| 103501 |
+
"epoch": 16.83988603988604,
|
| 103502 |
+
"grad_norm": 0.1812399923801422,
|
| 103503 |
+
"learning_rate": 3.2717460852512814e-06,
|
| 103504 |
+
"loss": 0.7243,
|
| 103505 |
+
"step": 14785
|
| 103506 |
+
},
|
| 103507 |
+
{
|
| 103508 |
+
"epoch": 16.84102564102564,
|
| 103509 |
+
"grad_norm": 0.19768092036247253,
|
| 103510 |
+
"learning_rate": 3.2694421088125484e-06,
|
| 103511 |
+
"loss": 0.7388,
|
| 103512 |
+
"step": 14786
|
| 103513 |
+
},
|
| 103514 |
+
{
|
| 103515 |
+
"epoch": 16.842165242165244,
|
| 103516 |
+
"grad_norm": 0.22675330936908722,
|
| 103517 |
+
"learning_rate": 3.267138887135851e-06,
|
| 103518 |
+
"loss": 0.6823,
|
| 103519 |
+
"step": 14787
|
| 103520 |
+
},
|
| 103521 |
+
{
|
| 103522 |
+
"epoch": 16.843304843304843,
|
| 103523 |
+
"grad_norm": 0.2749398350715637,
|
| 103524 |
+
"learning_rate": 3.26483642030119e-06,
|
| 103525 |
+
"loss": 0.5847,
|
| 103526 |
+
"step": 14788
|
| 103527 |
+
},
|
| 103528 |
+
{
|
| 103529 |
+
"epoch": 16.844444444444445,
|
| 103530 |
+
"grad_norm": 0.23310603201389313,
|
| 103531 |
+
"learning_rate": 3.262534708388526e-06,
|
| 103532 |
+
"loss": 0.5695,
|
| 103533 |
+
"step": 14789
|
| 103534 |
+
},
|
| 103535 |
+
{
|
| 103536 |
+
"epoch": 16.845584045584047,
|
| 103537 |
+
"grad_norm": 0.16094189882278442,
|
| 103538 |
+
"learning_rate": 3.260233751477809e-06,
|
| 103539 |
+
"loss": 0.7909,
|
| 103540 |
+
"step": 14790
|
| 103541 |
+
},
|
| 103542 |
+
{
|
| 103543 |
+
"epoch": 16.846723646723646,
|
| 103544 |
+
"grad_norm": 0.1772032082080841,
|
| 103545 |
+
"learning_rate": 3.2579335496489617e-06,
|
| 103546 |
+
"loss": 0.9013,
|
| 103547 |
+
"step": 14791
|
| 103548 |
+
},
|
| 103549 |
+
{
|
| 103550 |
+
"epoch": 16.84786324786325,
|
| 103551 |
+
"grad_norm": 0.23654736578464508,
|
| 103552 |
+
"learning_rate": 3.2556341029818747e-06,
|
| 103553 |
+
"loss": 0.4448,
|
| 103554 |
+
"step": 14792
|
| 103555 |
+
},
|
| 103556 |
+
{
|
| 103557 |
+
"epoch": 16.84900284900285,
|
| 103558 |
+
"grad_norm": 0.21854573488235474,
|
| 103559 |
+
"learning_rate": 3.25333541155641e-06,
|
| 103560 |
+
"loss": 0.6025,
|
| 103561 |
+
"step": 14793
|
| 103562 |
+
},
|
| 103563 |
+
{
|
| 103564 |
+
"epoch": 16.85014245014245,
|
| 103565 |
+
"grad_norm": 0.1650972068309784,
|
| 103566 |
+
"learning_rate": 3.2510374754524037e-06,
|
| 103567 |
+
"loss": 0.8344,
|
| 103568 |
+
"step": 14794
|
| 103569 |
+
},
|
| 103570 |
+
{
|
| 103571 |
+
"epoch": 16.851282051282052,
|
| 103572 |
+
"grad_norm": 0.21306946873664856,
|
| 103573 |
+
"learning_rate": 3.2487402947496887e-06,
|
| 103574 |
+
"loss": 0.5082,
|
| 103575 |
+
"step": 14795
|
| 103576 |
+
},
|
| 103577 |
+
{
|
| 103578 |
+
"epoch": 16.852421652421654,
|
| 103579 |
+
"grad_norm": 0.187848761677742,
|
| 103580 |
+
"learning_rate": 3.246443869528032e-06,
|
| 103581 |
+
"loss": 0.712,
|
| 103582 |
+
"step": 14796
|
| 103583 |
+
},
|
| 103584 |
+
{
|
| 103585 |
+
"epoch": 16.853561253561253,
|
| 103586 |
+
"grad_norm": 0.22535042464733124,
|
| 103587 |
+
"learning_rate": 3.2441481998672004e-06,
|
| 103588 |
+
"loss": 0.4455,
|
| 103589 |
+
"step": 14797
|
| 103590 |
+
},
|
| 103591 |
+
{
|
| 103592 |
+
"epoch": 16.854700854700855,
|
| 103593 |
+
"grad_norm": 0.24472816288471222,
|
| 103594 |
+
"learning_rate": 3.2418532858469327e-06,
|
| 103595 |
+
"loss": 0.6224,
|
| 103596 |
+
"step": 14798
|
| 103597 |
+
},
|
| 103598 |
+
{
|
| 103599 |
+
"epoch": 16.855840455840458,
|
| 103600 |
+
"grad_norm": 0.2107369750738144,
|
| 103601 |
+
"learning_rate": 3.2395591275469427e-06,
|
| 103602 |
+
"loss": 0.5785,
|
| 103603 |
+
"step": 14799
|
| 103604 |
+
},
|
| 103605 |
+
{
|
| 103606 |
+
"epoch": 16.856980056980056,
|
| 103607 |
+
"grad_norm": 0.2104388326406479,
|
| 103608 |
+
"learning_rate": 3.237265725046898e-06,
|
| 103609 |
+
"loss": 0.4863,
|
| 103610 |
+
"step": 14800
|
| 103611 |
+
},
|
| 103612 |
+
{
|
| 103613 |
+
"epoch": 16.85811965811966,
|
| 103614 |
+
"grad_norm": 0.20485562086105347,
|
| 103615 |
+
"learning_rate": 3.234973078426462e-06,
|
| 103616 |
+
"loss": 0.799,
|
| 103617 |
+
"step": 14801
|
| 103618 |
+
},
|
| 103619 |
+
{
|
| 103620 |
+
"epoch": 16.85925925925926,
|
| 103621 |
+
"grad_norm": 0.2231946438550949,
|
| 103622 |
+
"learning_rate": 3.232681187765266e-06,
|
| 103623 |
+
"loss": 0.7746,
|
| 103624 |
+
"step": 14802
|
| 103625 |
+
},
|
| 103626 |
+
{
|
| 103627 |
+
"epoch": 16.86039886039886,
|
| 103628 |
+
"grad_norm": 0.1903351992368698,
|
| 103629 |
+
"learning_rate": 3.2303900531429176e-06,
|
| 103630 |
+
"loss": 0.7163,
|
| 103631 |
+
"step": 14803
|
| 103632 |
+
},
|
| 103633 |
+
{
|
| 103634 |
+
"epoch": 16.861538461538462,
|
| 103635 |
+
"grad_norm": 0.19041438400745392,
|
| 103636 |
+
"learning_rate": 3.2280996746389847e-06,
|
| 103637 |
+
"loss": 0.699,
|
| 103638 |
+
"step": 14804
|
| 103639 |
+
},
|
| 103640 |
+
{
|
| 103641 |
+
"epoch": 16.862678062678064,
|
| 103642 |
+
"grad_norm": 0.16172075271606445,
|
| 103643 |
+
"learning_rate": 3.225810052333017e-06,
|
| 103644 |
+
"loss": 0.7741,
|
| 103645 |
+
"step": 14805
|
| 103646 |
+
},
|
| 103647 |
+
{
|
| 103648 |
+
"epoch": 16.863817663817663,
|
| 103649 |
+
"grad_norm": 0.30482029914855957,
|
| 103650 |
+
"learning_rate": 3.2235211863045567e-06,
|
| 103651 |
+
"loss": 0.6755,
|
| 103652 |
+
"step": 14806
|
| 103653 |
+
},
|
| 103654 |
+
{
|
| 103655 |
+
"epoch": 16.864957264957265,
|
| 103656 |
+
"grad_norm": 0.2442866861820221,
|
| 103657 |
+
"learning_rate": 3.2212330766330846e-06,
|
| 103658 |
+
"loss": 0.6495,
|
| 103659 |
+
"step": 14807
|
| 103660 |
+
},
|
| 103661 |
+
{
|
| 103662 |
+
"epoch": 16.866096866096868,
|
| 103663 |
+
"grad_norm": 0.18580791354179382,
|
| 103664 |
+
"learning_rate": 3.2189457233980815e-06,
|
| 103665 |
+
"loss": 0.5523,
|
| 103666 |
+
"step": 14808
|
| 103667 |
+
},
|
| 103668 |
+
{
|
| 103669 |
+
"epoch": 16.867236467236467,
|
| 103670 |
+
"grad_norm": 0.17136384546756744,
|
| 103671 |
+
"learning_rate": 3.216659126678989e-06,
|
| 103672 |
+
"loss": 0.6446,
|
| 103673 |
+
"step": 14809
|
| 103674 |
+
},
|
| 103675 |
+
{
|
| 103676 |
+
"epoch": 16.86837606837607,
|
| 103677 |
+
"grad_norm": 0.1991439014673233,
|
| 103678 |
+
"learning_rate": 3.214373286555236e-06,
|
| 103679 |
+
"loss": 0.673,
|
| 103680 |
+
"step": 14810
|
| 103681 |
+
},
|
| 103682 |
+
{
|
| 103683 |
+
"epoch": 16.86951566951567,
|
| 103684 |
+
"grad_norm": 0.2081996649503708,
|
| 103685 |
+
"learning_rate": 3.212088203106206e-06,
|
| 103686 |
+
"loss": 0.4252,
|
| 103687 |
+
"step": 14811
|
| 103688 |
+
},
|
| 103689 |
+
{
|
| 103690 |
+
"epoch": 16.87065527065527,
|
| 103691 |
+
"grad_norm": 0.1936638057231903,
|
| 103692 |
+
"learning_rate": 3.209803876411266e-06,
|
| 103693 |
+
"loss": 0.5941,
|
| 103694 |
+
"step": 14812
|
| 103695 |
+
},
|
| 103696 |
+
{
|
| 103697 |
+
"epoch": 16.871794871794872,
|
| 103698 |
+
"grad_norm": 0.19413010776042938,
|
| 103699 |
+
"learning_rate": 3.2075203065497633e-06,
|
| 103700 |
+
"loss": 0.5806,
|
| 103701 |
+
"step": 14813
|
| 103702 |
+
},
|
| 103703 |
+
{
|
| 103704 |
+
"epoch": 16.872934472934475,
|
| 103705 |
+
"grad_norm": 0.19577611982822418,
|
| 103706 |
+
"learning_rate": 3.2052374936010154e-06,
|
| 103707 |
+
"loss": 0.5351,
|
| 103708 |
+
"step": 14814
|
| 103709 |
+
},
|
| 103710 |
+
{
|
| 103711 |
+
"epoch": 16.874074074074073,
|
| 103712 |
+
"grad_norm": 0.21065311133861542,
|
| 103713 |
+
"learning_rate": 3.202955437644298e-06,
|
| 103714 |
+
"loss": 0.4754,
|
| 103715 |
+
"step": 14815
|
| 103716 |
+
},
|
| 103717 |
+
{
|
| 103718 |
+
"epoch": 16.875213675213676,
|
| 103719 |
+
"grad_norm": 0.21284545958042145,
|
| 103720 |
+
"learning_rate": 3.2006741387588773e-06,
|
| 103721 |
+
"loss": 0.5826,
|
| 103722 |
+
"step": 14816
|
| 103723 |
+
},
|
| 103724 |
+
{
|
| 103725 |
+
"epoch": 16.876353276353278,
|
| 103726 |
+
"grad_norm": 0.223323255777359,
|
| 103727 |
+
"learning_rate": 3.198393597024002e-06,
|
| 103728 |
+
"loss": 0.5474,
|
| 103729 |
+
"step": 14817
|
| 103730 |
+
},
|
| 103731 |
+
{
|
| 103732 |
+
"epoch": 16.877492877492877,
|
| 103733 |
+
"grad_norm": 0.17122265696525574,
|
| 103734 |
+
"learning_rate": 3.1961138125188665e-06,
|
| 103735 |
+
"loss": 0.6242,
|
| 103736 |
+
"step": 14818
|
| 103737 |
+
},
|
| 103738 |
+
{
|
| 103739 |
+
"epoch": 16.87863247863248,
|
| 103740 |
+
"grad_norm": 0.15588898956775665,
|
| 103741 |
+
"learning_rate": 3.193834785322658e-06,
|
| 103742 |
+
"loss": 0.8631,
|
| 103743 |
+
"step": 14819
|
| 103744 |
+
},
|
| 103745 |
+
{
|
| 103746 |
+
"epoch": 16.87977207977208,
|
| 103747 |
+
"grad_norm": 0.20647187530994415,
|
| 103748 |
+
"learning_rate": 3.191556515514535e-06,
|
| 103749 |
+
"loss": 0.655,
|
| 103750 |
+
"step": 14820
|
| 103751 |
+
},
|
| 103752 |
+
{
|
| 103753 |
+
"epoch": 16.88091168091168,
|
| 103754 |
+
"grad_norm": 0.21994750201702118,
|
| 103755 |
+
"learning_rate": 3.189279003173634e-06,
|
| 103756 |
+
"loss": 0.8065,
|
| 103757 |
+
"step": 14821
|
| 103758 |
+
},
|
| 103759 |
+
{
|
| 103760 |
+
"epoch": 16.882051282051282,
|
| 103761 |
+
"grad_norm": 0.16426804661750793,
|
| 103762 |
+
"learning_rate": 3.187002248379045e-06,
|
| 103763 |
+
"loss": 0.7207,
|
| 103764 |
+
"step": 14822
|
| 103765 |
+
},
|
| 103766 |
+
{
|
| 103767 |
+
"epoch": 16.883190883190885,
|
| 103768 |
+
"grad_norm": 0.28191372752189636,
|
| 103769 |
+
"learning_rate": 3.1847262512098573e-06,
|
| 103770 |
+
"loss": 0.6303,
|
| 103771 |
+
"step": 14823
|
| 103772 |
+
},
|
| 103773 |
+
{
|
| 103774 |
+
"epoch": 16.884330484330484,
|
| 103775 |
+
"grad_norm": 0.18324729800224304,
|
| 103776 |
+
"learning_rate": 3.182451011745116e-06,
|
| 103777 |
+
"loss": 0.5286,
|
| 103778 |
+
"step": 14824
|
| 103779 |
+
},
|
| 103780 |
+
{
|
| 103781 |
+
"epoch": 16.885470085470086,
|
| 103782 |
+
"grad_norm": 0.18427149951457977,
|
| 103783 |
+
"learning_rate": 3.1801765300638526e-06,
|
| 103784 |
+
"loss": 0.6509,
|
| 103785 |
+
"step": 14825
|
| 103786 |
+
},
|
| 103787 |
+
{
|
| 103788 |
+
"epoch": 16.886609686609688,
|
| 103789 |
+
"grad_norm": 0.24386465549468994,
|
| 103790 |
+
"learning_rate": 3.1779028062450673e-06,
|
| 103791 |
+
"loss": 0.7362,
|
| 103792 |
+
"step": 14826
|
| 103793 |
+
},
|
| 103794 |
+
{
|
| 103795 |
+
"epoch": 16.887749287749287,
|
| 103796 |
+
"grad_norm": 0.20113185048103333,
|
| 103797 |
+
"learning_rate": 3.1756298403677164e-06,
|
| 103798 |
+
"loss": 0.5979,
|
| 103799 |
+
"step": 14827
|
| 103800 |
+
},
|
| 103801 |
+
{
|
| 103802 |
+
"epoch": 16.88888888888889,
|
| 103803 |
+
"grad_norm": 0.214745432138443,
|
| 103804 |
+
"learning_rate": 3.173357632510765e-06,
|
| 103805 |
+
"loss": 0.6971,
|
| 103806 |
+
"step": 14828
|
| 103807 |
+
},
|
| 103808 |
+
{
|
| 103809 |
+
"epoch": 16.89002849002849,
|
| 103810 |
+
"grad_norm": 0.21380400657653809,
|
| 103811 |
+
"learning_rate": 3.171086182753133e-06,
|
| 103812 |
+
"loss": 0.7386,
|
| 103813 |
+
"step": 14829
|
| 103814 |
+
},
|
| 103815 |
+
{
|
| 103816 |
+
"epoch": 16.89116809116809,
|
| 103817 |
+
"grad_norm": 0.1792917400598526,
|
| 103818 |
+
"learning_rate": 3.1688154911737015e-06,
|
| 103819 |
+
"loss": 0.7436,
|
| 103820 |
+
"step": 14830
|
| 103821 |
+
},
|
| 103822 |
+
{
|
| 103823 |
+
"epoch": 16.892307692307693,
|
| 103824 |
+
"grad_norm": 0.18266457319259644,
|
| 103825 |
+
"learning_rate": 3.1665455578513415e-06,
|
| 103826 |
+
"loss": 0.7923,
|
| 103827 |
+
"step": 14831
|
| 103828 |
+
},
|
| 103829 |
+
{
|
| 103830 |
+
"epoch": 16.893447293447295,
|
| 103831 |
+
"grad_norm": 0.24912922084331512,
|
| 103832 |
+
"learning_rate": 3.1642763828649003e-06,
|
| 103833 |
+
"loss": 0.5411,
|
| 103834 |
+
"step": 14832
|
| 103835 |
+
},
|
| 103836 |
+
{
|
| 103837 |
+
"epoch": 16.894586894586894,
|
| 103838 |
+
"grad_norm": 0.21742737293243408,
|
| 103839 |
+
"learning_rate": 3.1620079662931927e-06,
|
| 103840 |
+
"loss": 0.6616,
|
| 103841 |
+
"step": 14833
|
| 103842 |
+
},
|
| 103843 |
+
{
|
| 103844 |
+
"epoch": 16.895726495726496,
|
| 103845 |
+
"grad_norm": 0.1927417814731598,
|
| 103846 |
+
"learning_rate": 3.1597403082149953e-06,
|
| 103847 |
+
"loss": 0.5473,
|
| 103848 |
+
"step": 14834
|
| 103849 |
+
},
|
| 103850 |
+
{
|
| 103851 |
+
"epoch": 16.8968660968661,
|
| 103852 |
+
"grad_norm": 0.17637412250041962,
|
| 103853 |
+
"learning_rate": 3.15747340870908e-06,
|
| 103854 |
+
"loss": 0.6818,
|
| 103855 |
+
"step": 14835
|
| 103856 |
+
},
|
| 103857 |
+
{
|
| 103858 |
+
"epoch": 16.898005698005697,
|
| 103859 |
+
"grad_norm": 0.23352162539958954,
|
| 103860 |
+
"learning_rate": 3.155207267854182e-06,
|
| 103861 |
+
"loss": 0.7385,
|
| 103862 |
+
"step": 14836
|
| 103863 |
+
},
|
| 103864 |
+
{
|
| 103865 |
+
"epoch": 16.8991452991453,
|
| 103866 |
+
"grad_norm": 0.17706164717674255,
|
| 103867 |
+
"learning_rate": 3.1529418857290134e-06,
|
| 103868 |
+
"loss": 0.7056,
|
| 103869 |
+
"step": 14837
|
| 103870 |
+
},
|
| 103871 |
+
{
|
| 103872 |
+
"epoch": 16.900284900284902,
|
| 103873 |
+
"grad_norm": 0.17459584772586823,
|
| 103874 |
+
"learning_rate": 3.1506772624122468e-06,
|
| 103875 |
+
"loss": 0.7081,
|
| 103876 |
+
"step": 14838
|
| 103877 |
+
},
|
| 103878 |
+
{
|
| 103879 |
+
"epoch": 16.9014245014245,
|
| 103880 |
+
"grad_norm": 0.2351045459508896,
|
| 103881 |
+
"learning_rate": 3.1484133979825387e-06,
|
| 103882 |
+
"loss": 0.5974,
|
| 103883 |
+
"step": 14839
|
| 103884 |
+
},
|
| 103885 |
+
{
|
| 103886 |
+
"epoch": 16.902564102564103,
|
| 103887 |
+
"grad_norm": 0.29291340708732605,
|
| 103888 |
+
"learning_rate": 3.1461502925185376e-06,
|
| 103889 |
+
"loss": 0.6583,
|
| 103890 |
+
"step": 14840
|
| 103891 |
+
},
|
| 103892 |
+
{
|
| 103893 |
+
"epoch": 16.903703703703705,
|
| 103894 |
+
"grad_norm": 0.24841253459453583,
|
| 103895 |
+
"learning_rate": 3.1438879460988278e-06,
|
| 103896 |
+
"loss": 0.4212,
|
| 103897 |
+
"step": 14841
|
| 103898 |
+
},
|
| 103899 |
+
{
|
| 103900 |
+
"epoch": 16.904843304843304,
|
| 103901 |
+
"grad_norm": 0.1727171242237091,
|
| 103902 |
+
"learning_rate": 3.141626358801997e-06,
|
| 103903 |
+
"loss": 0.8017,
|
| 103904 |
+
"step": 14842
|
| 103905 |
+
},
|
| 103906 |
+
{
|
| 103907 |
+
"epoch": 16.905982905982906,
|
| 103908 |
+
"grad_norm": 0.18711979687213898,
|
| 103909 |
+
"learning_rate": 3.1393655307065894e-06,
|
| 103910 |
+
"loss": 0.6099,
|
| 103911 |
+
"step": 14843
|
| 103912 |
+
},
|
| 103913 |
+
{
|
| 103914 |
+
"epoch": 16.90712250712251,
|
| 103915 |
+
"grad_norm": 0.2179073989391327,
|
| 103916 |
+
"learning_rate": 3.1371054618911433e-06,
|
| 103917 |
+
"loss": 0.6586,
|
| 103918 |
+
"step": 14844
|
| 103919 |
+
},
|
| 103920 |
+
{
|
| 103921 |
+
"epoch": 16.908262108262107,
|
| 103922 |
+
"grad_norm": 0.177495077252388,
|
| 103923 |
+
"learning_rate": 3.13484615243414e-06,
|
| 103924 |
+
"loss": 0.9016,
|
| 103925 |
+
"step": 14845
|
| 103926 |
+
},
|
| 103927 |
+
{
|
| 103928 |
+
"epoch": 16.90940170940171,
|
| 103929 |
+
"grad_norm": 0.21736614406108856,
|
| 103930 |
+
"learning_rate": 3.132587602414061e-06,
|
| 103931 |
+
"loss": 0.6466,
|
| 103932 |
+
"step": 14846
|
| 103933 |
+
},
|
| 103934 |
+
{
|
| 103935 |
+
"epoch": 16.910541310541312,
|
| 103936 |
+
"grad_norm": 0.19747169315814972,
|
| 103937 |
+
"learning_rate": 3.1303298119093493e-06,
|
| 103938 |
+
"loss": 0.6743,
|
| 103939 |
+
"step": 14847
|
| 103940 |
+
},
|
| 103941 |
+
{
|
| 103942 |
+
"epoch": 16.91168091168091,
|
| 103943 |
+
"grad_norm": 0.19041146337985992,
|
| 103944 |
+
"learning_rate": 3.1280727809984313e-06,
|
| 103945 |
+
"loss": 0.5422,
|
| 103946 |
+
"step": 14848
|
| 103947 |
+
},
|
| 103948 |
+
{
|
| 103949 |
+
"epoch": 16.912820512820513,
|
| 103950 |
+
"grad_norm": 0.21531081199645996,
|
| 103951 |
+
"learning_rate": 3.1258165097596855e-06,
|
| 103952 |
+
"loss": 0.6582,
|
| 103953 |
+
"step": 14849
|
| 103954 |
+
},
|
| 103955 |
+
{
|
| 103956 |
+
"epoch": 16.913960113960115,
|
| 103957 |
+
"grad_norm": 0.22194461524486542,
|
| 103958 |
+
"learning_rate": 3.1235609982714823e-06,
|
| 103959 |
+
"loss": 0.6973,
|
| 103960 |
+
"step": 14850
|
| 103961 |
+
},
|
| 103962 |
+
{
|
| 103963 |
+
"epoch": 16.915099715099714,
|
| 103964 |
+
"grad_norm": 0.1573968082666397,
|
| 103965 |
+
"learning_rate": 3.1213062466121796e-06,
|
| 103966 |
+
"loss": 0.8643,
|
| 103967 |
+
"step": 14851
|
| 103968 |
+
},
|
| 103969 |
+
{
|
| 103970 |
+
"epoch": 16.916239316239317,
|
| 103971 |
+
"grad_norm": 0.1980697512626648,
|
| 103972 |
+
"learning_rate": 3.1190522548600694e-06,
|
| 103973 |
+
"loss": 0.5669,
|
| 103974 |
+
"step": 14852
|
| 103975 |
+
},
|
| 103976 |
+
{
|
| 103977 |
+
"epoch": 16.91737891737892,
|
| 103978 |
+
"grad_norm": 0.1801229864358902,
|
| 103979 |
+
"learning_rate": 3.1167990230934475e-06,
|
| 103980 |
+
"loss": 0.8694,
|
| 103981 |
+
"step": 14853
|
| 103982 |
+
},
|
| 103983 |
+
{
|
| 103984 |
+
"epoch": 16.918518518518518,
|
| 103985 |
+
"grad_norm": 0.18354345858097076,
|
| 103986 |
+
"learning_rate": 3.114546551390576e-06,
|
| 103987 |
+
"loss": 0.6403,
|
| 103988 |
+
"step": 14854
|
| 103989 |
+
},
|
| 103990 |
+
{
|
| 103991 |
+
"epoch": 16.91965811965812,
|
| 103992 |
+
"grad_norm": 0.15931031107902527,
|
| 103993 |
+
"learning_rate": 3.1122948398296906e-06,
|
| 103994 |
+
"loss": 0.5668,
|
| 103995 |
+
"step": 14855
|
| 103996 |
+
},
|
| 103997 |
+
{
|
| 103998 |
+
"epoch": 16.920797720797722,
|
| 103999 |
+
"grad_norm": 0.204697385430336,
|
| 104000 |
+
"learning_rate": 3.110043888488995e-06,
|
| 104001 |
+
"loss": 0.8192,
|
| 104002 |
+
"step": 14856
|
| 104003 |
+
},
|
| 104004 |
+
{
|
| 104005 |
+
"epoch": 16.92193732193732,
|
| 104006 |
+
"grad_norm": 0.2309281975030899,
|
| 104007 |
+
"learning_rate": 3.1077936974466706e-06,
|
| 104008 |
+
"loss": 0.5979,
|
| 104009 |
+
"step": 14857
|
| 104010 |
+
},
|
| 104011 |
+
{
|
| 104012 |
+
"epoch": 16.923076923076923,
|
| 104013 |
+
"grad_norm": 0.19758395850658417,
|
| 104014 |
+
"learning_rate": 3.105544266780874e-06,
|
| 104015 |
+
"loss": 0.8186,
|
| 104016 |
+
"step": 14858
|
| 104017 |
+
},
|
| 104018 |
+
{
|
| 104019 |
+
"epoch": 16.924216524216526,
|
| 104020 |
+
"grad_norm": 0.21037371456623077,
|
| 104021 |
+
"learning_rate": 3.103295596569744e-06,
|
| 104022 |
+
"loss": 0.5528,
|
| 104023 |
+
"step": 14859
|
| 104024 |
+
},
|
| 104025 |
+
{
|
| 104026 |
+
"epoch": 16.925356125356124,
|
| 104027 |
+
"grad_norm": 0.20761847496032715,
|
| 104028 |
+
"learning_rate": 3.1010476868913645e-06,
|
| 104029 |
+
"loss": 0.7354,
|
| 104030 |
+
"step": 14860
|
| 104031 |
+
},
|
| 104032 |
+
{
|
| 104033 |
+
"epoch": 16.926495726495727,
|
| 104034 |
+
"grad_norm": 0.1998409628868103,
|
| 104035 |
+
"learning_rate": 3.098800537823818e-06,
|
| 104036 |
+
"loss": 0.7765,
|
| 104037 |
+
"step": 14861
|
| 104038 |
+
},
|
| 104039 |
+
{
|
| 104040 |
+
"epoch": 16.92763532763533,
|
| 104041 |
+
"grad_norm": 0.18831759691238403,
|
| 104042 |
+
"learning_rate": 3.0965541494451634e-06,
|
| 104043 |
+
"loss": 0.6404,
|
| 104044 |
+
"step": 14862
|
| 104045 |
+
},
|
| 104046 |
+
{
|
| 104047 |
+
"epoch": 16.928774928774928,
|
| 104048 |
+
"grad_norm": 0.1787118762731552,
|
| 104049 |
+
"learning_rate": 3.094308521833422e-06,
|
| 104050 |
+
"loss": 0.7494,
|
| 104051 |
+
"step": 14863
|
| 104052 |
+
},
|
| 104053 |
+
{
|
| 104054 |
+
"epoch": 16.92991452991453,
|
| 104055 |
+
"grad_norm": 0.2410799264907837,
|
| 104056 |
+
"learning_rate": 3.0920636550665823e-06,
|
| 104057 |
+
"loss": 0.6113,
|
| 104058 |
+
"step": 14864
|
| 104059 |
+
},
|
| 104060 |
+
{
|
| 104061 |
+
"epoch": 16.931054131054132,
|
| 104062 |
+
"grad_norm": 0.18928298354148865,
|
| 104063 |
+
"learning_rate": 3.089819549222617e-06,
|
| 104064 |
+
"loss": 0.557,
|
| 104065 |
+
"step": 14865
|
| 104066 |
+
},
|
| 104067 |
+
{
|
| 104068 |
+
"epoch": 16.93219373219373,
|
| 104069 |
+
"grad_norm": 0.1769939512014389,
|
| 104070 |
+
"learning_rate": 3.087576204379475e-06,
|
| 104071 |
+
"loss": 0.7215,
|
| 104072 |
+
"step": 14866
|
| 104073 |
+
},
|
| 104074 |
+
{
|
| 104075 |
+
"epoch": 16.933333333333334,
|
| 104076 |
+
"grad_norm": 0.20230601727962494,
|
| 104077 |
+
"learning_rate": 3.0853336206150716e-06,
|
| 104078 |
+
"loss": 0.4905,
|
| 104079 |
+
"step": 14867
|
| 104080 |
+
},
|
| 104081 |
+
{
|
| 104082 |
+
"epoch": 16.934472934472936,
|
| 104083 |
+
"grad_norm": 2.089182138442993,
|
| 104084 |
+
"learning_rate": 3.0830917980072954e-06,
|
| 104085 |
+
"loss": 0.6583,
|
| 104086 |
+
"step": 14868
|
| 104087 |
+
},
|
| 104088 |
+
{
|
| 104089 |
+
"epoch": 16.935612535612535,
|
| 104090 |
+
"grad_norm": 0.16396862268447876,
|
| 104091 |
+
"learning_rate": 3.080850736634011e-06,
|
| 104092 |
+
"loss": 0.8526,
|
| 104093 |
+
"step": 14869
|
| 104094 |
+
},
|
| 104095 |
+
{
|
| 104096 |
+
"epoch": 16.936752136752137,
|
| 104097 |
+
"grad_norm": 0.18934102356433868,
|
| 104098 |
+
"learning_rate": 3.0786104365730595e-06,
|
| 104099 |
+
"loss": 0.7467,
|
| 104100 |
+
"step": 14870
|
| 104101 |
+
},
|
| 104102 |
+
{
|
| 104103 |
+
"epoch": 16.93789173789174,
|
| 104104 |
+
"grad_norm": 0.21667882800102234,
|
| 104105 |
+
"learning_rate": 3.0763708979022587e-06,
|
| 104106 |
+
"loss": 0.5795,
|
| 104107 |
+
"step": 14871
|
| 104108 |
+
},
|
| 104109 |
+
{
|
| 104110 |
+
"epoch": 16.939031339031338,
|
| 104111 |
+
"grad_norm": 0.1986517608165741,
|
| 104112 |
+
"learning_rate": 3.0741321206993718e-06,
|
| 104113 |
+
"loss": 0.6223,
|
| 104114 |
+
"step": 14872
|
| 104115 |
+
},
|
| 104116 |
+
{
|
| 104117 |
+
"epoch": 16.94017094017094,
|
| 104118 |
+
"grad_norm": 0.2782285809516907,
|
| 104119 |
+
"learning_rate": 3.071894105042181e-06,
|
| 104120 |
+
"loss": 0.6218,
|
| 104121 |
+
"step": 14873
|
| 104122 |
+
},
|
| 104123 |
+
{
|
| 104124 |
+
"epoch": 16.941310541310543,
|
| 104125 |
+
"grad_norm": 0.18464824557304382,
|
| 104126 |
+
"learning_rate": 3.069656851008415e-06,
|
| 104127 |
+
"loss": 0.5569,
|
| 104128 |
+
"step": 14874
|
| 104129 |
+
},
|
| 104130 |
+
{
|
| 104131 |
+
"epoch": 16.94245014245014,
|
| 104132 |
+
"grad_norm": 0.20595508813858032,
|
| 104133 |
+
"learning_rate": 3.067420358675771e-06,
|
| 104134 |
+
"loss": 0.5863,
|
| 104135 |
+
"step": 14875
|
| 104136 |
+
},
|
| 104137 |
+
{
|
| 104138 |
+
"epoch": 16.943589743589744,
|
| 104139 |
+
"grad_norm": 0.23332270979881287,
|
| 104140 |
+
"learning_rate": 3.065184628121931e-06,
|
| 104141 |
+
"loss": 0.693,
|
| 104142 |
+
"step": 14876
|
| 104143 |
+
},
|
| 104144 |
+
{
|
| 104145 |
+
"epoch": 16.944729344729346,
|
| 104146 |
+
"grad_norm": 0.22465844452381134,
|
| 104147 |
+
"learning_rate": 3.062949659424552e-06,
|
| 104148 |
+
"loss": 0.6365,
|
| 104149 |
+
"step": 14877
|
| 104150 |
+
},
|
| 104151 |
+
{
|
| 104152 |
+
"epoch": 16.945868945868945,
|
| 104153 |
+
"grad_norm": 0.209318146109581,
|
| 104154 |
+
"learning_rate": 3.060715452661261e-06,
|
| 104155 |
+
"loss": 0.537,
|
| 104156 |
+
"step": 14878
|
| 104157 |
+
},
|
| 104158 |
+
{
|
| 104159 |
+
"epoch": 16.947008547008547,
|
| 104160 |
+
"grad_norm": 0.21809899806976318,
|
| 104161 |
+
"learning_rate": 3.058482007909652e-06,
|
| 104162 |
+
"loss": 0.5206,
|
| 104163 |
+
"step": 14879
|
| 104164 |
+
},
|
| 104165 |
+
{
|
| 104166 |
+
"epoch": 16.94814814814815,
|
| 104167 |
+
"grad_norm": 0.20825175940990448,
|
| 104168 |
+
"learning_rate": 3.056249325247301e-06,
|
| 104169 |
+
"loss": 0.6252,
|
| 104170 |
+
"step": 14880
|
| 104171 |
+
},
|
| 104172 |
+
{
|
| 104173 |
+
"epoch": 16.94928774928775,
|
| 104174 |
+
"grad_norm": 0.23183006048202515,
|
| 104175 |
+
"learning_rate": 3.0540174047517578e-06,
|
| 104176 |
+
"loss": 0.5467,
|
| 104177 |
+
"step": 14881
|
| 104178 |
+
},
|
| 104179 |
+
{
|
| 104180 |
+
"epoch": 16.95042735042735,
|
| 104181 |
+
"grad_norm": 0.1657174676656723,
|
| 104182 |
+
"learning_rate": 3.0517862465005486e-06,
|
| 104183 |
+
"loss": 0.7303,
|
| 104184 |
+
"step": 14882
|
| 104185 |
+
},
|
| 104186 |
+
{
|
| 104187 |
+
"epoch": 16.951566951566953,
|
| 104188 |
+
"grad_norm": 0.23311227560043335,
|
| 104189 |
+
"learning_rate": 3.049555850571148e-06,
|
| 104190 |
+
"loss": 0.7206,
|
| 104191 |
+
"step": 14883
|
| 104192 |
+
},
|
| 104193 |
+
{
|
| 104194 |
+
"epoch": 16.95270655270655,
|
| 104195 |
+
"grad_norm": 0.2159399688243866,
|
| 104196 |
+
"learning_rate": 3.047326217041041e-06,
|
| 104197 |
+
"loss": 0.7392,
|
| 104198 |
+
"step": 14884
|
| 104199 |
+
},
|
| 104200 |
+
{
|
| 104201 |
+
"epoch": 16.953846153846154,
|
| 104202 |
+
"grad_norm": 0.18478034436702728,
|
| 104203 |
+
"learning_rate": 3.045097345987671e-06,
|
| 104204 |
+
"loss": 0.6682,
|
| 104205 |
+
"step": 14885
|
| 104206 |
+
},
|
| 104207 |
+
{
|
| 104208 |
+
"epoch": 16.954985754985756,
|
| 104209 |
+
"grad_norm": 0.22123868763446808,
|
| 104210 |
+
"learning_rate": 3.042869237488444e-06,
|
| 104211 |
+
"loss": 0.5325,
|
| 104212 |
+
"step": 14886
|
| 104213 |
+
},
|
| 104214 |
+
{
|
| 104215 |
+
"epoch": 16.956125356125355,
|
| 104216 |
+
"grad_norm": 0.21874210238456726,
|
| 104217 |
+
"learning_rate": 3.040641891620746e-06,
|
| 104218 |
+
"loss": 0.7025,
|
| 104219 |
+
"step": 14887
|
| 104220 |
+
},
|
| 104221 |
+
{
|
| 104222 |
+
"epoch": 16.957264957264957,
|
| 104223 |
+
"grad_norm": 0.19596925377845764,
|
| 104224 |
+
"learning_rate": 3.0384153084619477e-06,
|
| 104225 |
+
"loss": 0.7079,
|
| 104226 |
+
"step": 14888
|
| 104227 |
+
},
|
| 104228 |
+
{
|
| 104229 |
+
"epoch": 16.95840455840456,
|
| 104230 |
+
"grad_norm": 0.17991815507411957,
|
| 104231 |
+
"learning_rate": 3.0361894880893834e-06,
|
| 104232 |
+
"loss": 0.582,
|
| 104233 |
+
"step": 14889
|
| 104234 |
+
},
|
| 104235 |
+
{
|
| 104236 |
+
"epoch": 16.95954415954416,
|
| 104237 |
+
"grad_norm": 0.2364872395992279,
|
| 104238 |
+
"learning_rate": 3.0339644305803523e-06,
|
| 104239 |
+
"loss": 0.641,
|
| 104240 |
+
"step": 14890
|
| 104241 |
+
},
|
| 104242 |
+
{
|
| 104243 |
+
"epoch": 16.96068376068376,
|
| 104244 |
+
"grad_norm": 0.2232533097267151,
|
| 104245 |
+
"learning_rate": 3.0317401360121456e-06,
|
| 104246 |
+
"loss": 0.6508,
|
| 104247 |
+
"step": 14891
|
| 104248 |
+
},
|
| 104249 |
+
{
|
| 104250 |
+
"epoch": 16.961823361823363,
|
| 104251 |
+
"grad_norm": 0.21058550477027893,
|
| 104252 |
+
"learning_rate": 3.0295166044620154e-06,
|
| 104253 |
+
"loss": 0.6476,
|
| 104254 |
+
"step": 14892
|
| 104255 |
+
},
|
| 104256 |
+
{
|
| 104257 |
+
"epoch": 16.962962962962962,
|
| 104258 |
+
"grad_norm": 0.16185157001018524,
|
| 104259 |
+
"learning_rate": 3.0272938360072e-06,
|
| 104260 |
+
"loss": 0.6307,
|
| 104261 |
+
"step": 14893
|
| 104262 |
+
},
|
| 104263 |
+
{
|
| 104264 |
+
"epoch": 16.964102564102564,
|
| 104265 |
+
"grad_norm": 0.24551266431808472,
|
| 104266 |
+
"learning_rate": 3.025071830724882e-06,
|
| 104267 |
+
"loss": 0.4534,
|
| 104268 |
+
"step": 14894
|
| 104269 |
+
},
|
| 104270 |
+
{
|
| 104271 |
+
"epoch": 16.965242165242167,
|
| 104272 |
+
"grad_norm": 0.21784427762031555,
|
| 104273 |
+
"learning_rate": 3.0228505886922582e-06,
|
| 104274 |
+
"loss": 0.6538,
|
| 104275 |
+
"step": 14895
|
| 104276 |
+
},
|
| 104277 |
+
{
|
| 104278 |
+
"epoch": 16.966381766381765,
|
| 104279 |
+
"grad_norm": 0.6637478470802307,
|
| 104280 |
+
"learning_rate": 3.0206301099864747e-06,
|
| 104281 |
+
"loss": 0.7642,
|
| 104282 |
+
"step": 14896
|
| 104283 |
+
},
|
| 104284 |
+
{
|
| 104285 |
+
"epoch": 16.967521367521368,
|
| 104286 |
+
"grad_norm": 0.1842934638261795,
|
| 104287 |
+
"learning_rate": 3.0184103946846453e-06,
|
| 104288 |
+
"loss": 0.8049,
|
| 104289 |
+
"step": 14897
|
| 104290 |
+
},
|
| 104291 |
+
{
|
| 104292 |
+
"epoch": 16.96866096866097,
|
| 104293 |
+
"grad_norm": 0.19058462977409363,
|
| 104294 |
+
"learning_rate": 3.016191442863872e-06,
|
| 104295 |
+
"loss": 0.7098,
|
| 104296 |
+
"step": 14898
|
| 104297 |
+
},
|
| 104298 |
+
{
|
| 104299 |
+
"epoch": 16.96980056980057,
|
| 104300 |
+
"grad_norm": 0.18843884766101837,
|
| 104301 |
+
"learning_rate": 3.0139732546012266e-06,
|
| 104302 |
+
"loss": 0.6843,
|
| 104303 |
+
"step": 14899
|
| 104304 |
+
},
|
| 104305 |
+
{
|
| 104306 |
+
"epoch": 16.97094017094017,
|
| 104307 |
+
"grad_norm": 0.20854750275611877,
|
| 104308 |
+
"learning_rate": 3.0117558299737587e-06,
|
| 104309 |
+
"loss": 0.6831,
|
| 104310 |
+
"step": 14900
|
| 104311 |
+
},
|
| 104312 |
+
{
|
| 104313 |
+
"epoch": 16.972079772079773,
|
| 104314 |
+
"grad_norm": 0.20695297420024872,
|
| 104315 |
+
"learning_rate": 3.00953916905847e-06,
|
| 104316 |
+
"loss": 0.6184,
|
| 104317 |
+
"step": 14901
|
| 104318 |
+
},
|
| 104319 |
+
{
|
| 104320 |
+
"epoch": 16.973219373219372,
|
| 104321 |
+
"grad_norm": 0.1774614304304123,
|
| 104322 |
+
"learning_rate": 3.0073232719323637e-06,
|
| 104323 |
+
"loss": 0.8779,
|
| 104324 |
+
"step": 14902
|
| 104325 |
+
},
|
| 104326 |
+
{
|
| 104327 |
+
"epoch": 16.974358974358974,
|
| 104328 |
+
"grad_norm": 0.2092989981174469,
|
| 104329 |
+
"learning_rate": 3.0051081386723967e-06,
|
| 104330 |
+
"loss": 0.7488,
|
| 104331 |
+
"step": 14903
|
| 104332 |
+
},
|
| 104333 |
+
{
|
| 104334 |
+
"epoch": 16.975498575498577,
|
| 104335 |
+
"grad_norm": 0.19810225069522858,
|
| 104336 |
+
"learning_rate": 3.0028937693555195e-06,
|
| 104337 |
+
"loss": 0.6228,
|
| 104338 |
+
"step": 14904
|
| 104339 |
+
},
|
| 104340 |
+
{
|
| 104341 |
+
"epoch": 16.976638176638176,
|
| 104342 |
+
"grad_norm": 0.17616690695285797,
|
| 104343 |
+
"learning_rate": 3.0006801640586197e-06,
|
| 104344 |
+
"loss": 0.5429,
|
| 104345 |
+
"step": 14905
|
| 104346 |
+
},
|
| 104347 |
+
{
|
| 104348 |
+
"epoch": 16.977777777777778,
|
| 104349 |
+
"grad_norm": 0.17729952931404114,
|
| 104350 |
+
"learning_rate": 2.998467322858603e-06,
|
| 104351 |
+
"loss": 0.6216,
|
| 104352 |
+
"step": 14906
|
| 104353 |
+
},
|
| 104354 |
+
{
|
| 104355 |
+
"epoch": 16.97891737891738,
|
| 104356 |
+
"grad_norm": 0.16273464262485504,
|
| 104357 |
+
"learning_rate": 2.996255245832319e-06,
|
| 104358 |
+
"loss": 1.0133,
|
| 104359 |
+
"step": 14907
|
| 104360 |
+
},
|
| 104361 |
+
{
|
| 104362 |
+
"epoch": 16.98005698005698,
|
| 104363 |
+
"grad_norm": 0.20543445646762848,
|
| 104364 |
+
"learning_rate": 2.994043933056609e-06,
|
| 104365 |
+
"loss": 0.9106,
|
| 104366 |
+
"step": 14908
|
| 104367 |
+
},
|
| 104368 |
+
{
|
| 104369 |
+
"epoch": 16.98119658119658,
|
| 104370 |
+
"grad_norm": 0.18109145760536194,
|
| 104371 |
+
"learning_rate": 2.991833384608264e-06,
|
| 104372 |
+
"loss": 0.7002,
|
| 104373 |
+
"step": 14909
|
| 104374 |
+
},
|
| 104375 |
+
{
|
| 104376 |
+
"epoch": 16.982336182336184,
|
| 104377 |
+
"grad_norm": 0.23538149893283844,
|
| 104378 |
+
"learning_rate": 2.989623600564068e-06,
|
| 104379 |
+
"loss": 0.608,
|
| 104380 |
+
"step": 14910
|
| 104381 |
+
},
|
| 104382 |
+
{
|
| 104383 |
+
"epoch": 16.983475783475782,
|
| 104384 |
+
"grad_norm": 0.19944344460964203,
|
| 104385 |
+
"learning_rate": 2.987414581000772e-06,
|
| 104386 |
+
"loss": 0.5037,
|
| 104387 |
+
"step": 14911
|
| 104388 |
+
},
|
| 104389 |
+
{
|
| 104390 |
+
"epoch": 16.984615384615385,
|
| 104391 |
+
"grad_norm": 0.2201206386089325,
|
| 104392 |
+
"learning_rate": 2.9852063259951072e-06,
|
| 104393 |
+
"loss": 0.5791,
|
| 104394 |
+
"step": 14912
|
| 104395 |
+
},
|
| 104396 |
+
{
|
| 104397 |
+
"epoch": 16.985754985754987,
|
| 104398 |
+
"grad_norm": 0.17543230950832367,
|
| 104399 |
+
"learning_rate": 2.9829988356237624e-06,
|
| 104400 |
+
"loss": 0.7668,
|
| 104401 |
+
"step": 14913
|
| 104402 |
+
},
|
| 104403 |
+
{
|
| 104404 |
+
"epoch": 16.986894586894586,
|
| 104405 |
+
"grad_norm": 0.24056994915008545,
|
| 104406 |
+
"learning_rate": 2.9807921099634178e-06,
|
| 104407 |
+
"loss": 0.5446,
|
| 104408 |
+
"step": 14914
|
| 104409 |
+
},
|
| 104410 |
+
{
|
| 104411 |
+
"epoch": 16.988034188034188,
|
| 104412 |
+
"grad_norm": 0.1986328512430191,
|
| 104413 |
+
"learning_rate": 2.9785861490907145e-06,
|
| 104414 |
+
"loss": 0.4784,
|
| 104415 |
+
"step": 14915
|
| 104416 |
+
},
|
| 104417 |
+
{
|
| 104418 |
+
"epoch": 16.98917378917379,
|
| 104419 |
+
"grad_norm": 0.17287662625312805,
|
| 104420 |
+
"learning_rate": 2.976380953082272e-06,
|
| 104421 |
+
"loss": 0.7429,
|
| 104422 |
+
"step": 14916
|
| 104423 |
+
},
|
| 104424 |
+
{
|
| 104425 |
+
"epoch": 16.99031339031339,
|
| 104426 |
+
"grad_norm": 0.185201957821846,
|
| 104427 |
+
"learning_rate": 2.974176522014685e-06,
|
| 104428 |
+
"loss": 0.6023,
|
| 104429 |
+
"step": 14917
|
| 104430 |
+
},
|
| 104431 |
+
{
|
| 104432 |
+
"epoch": 16.99145299145299,
|
| 104433 |
+
"grad_norm": 0.17165346443653107,
|
| 104434 |
+
"learning_rate": 2.9719728559645194e-06,
|
| 104435 |
+
"loss": 0.5681,
|
| 104436 |
+
"step": 14918
|
| 104437 |
+
},
|
| 104438 |
+
{
|
| 104439 |
+
"epoch": 16.992592592592594,
|
| 104440 |
+
"grad_norm": 0.16597877442836761,
|
| 104441 |
+
"learning_rate": 2.96976995500832e-06,
|
| 104442 |
+
"loss": 0.5537,
|
| 104443 |
+
"step": 14919
|
| 104444 |
+
},
|
| 104445 |
+
{
|
| 104446 |
+
"epoch": 16.993732193732193,
|
| 104447 |
+
"grad_norm": 0.1753251552581787,
|
| 104448 |
+
"learning_rate": 2.967567819222586e-06,
|
| 104449 |
+
"loss": 0.7084,
|
| 104450 |
+
"step": 14920
|
| 104451 |
+
},
|
| 104452 |
+
{
|
| 104453 |
+
"epoch": 16.994871794871795,
|
| 104454 |
+
"grad_norm": 0.16678474843502045,
|
| 104455 |
+
"learning_rate": 2.965366448683812e-06,
|
| 104456 |
+
"loss": 0.6858,
|
| 104457 |
+
"step": 14921
|
| 104458 |
+
},
|
| 104459 |
+
{
|
| 104460 |
+
"epoch": 16.996011396011397,
|
| 104461 |
+
"grad_norm": 0.35645949840545654,
|
| 104462 |
+
"learning_rate": 2.9631658434684572e-06,
|
| 104463 |
+
"loss": 0.8283,
|
| 104464 |
+
"step": 14922
|
| 104465 |
+
},
|
| 104466 |
+
{
|
| 104467 |
+
"epoch": 16.997150997150996,
|
| 104468 |
+
"grad_norm": 0.15596824884414673,
|
| 104469 |
+
"learning_rate": 2.9609660036529596e-06,
|
| 104470 |
+
"loss": 0.8399,
|
| 104471 |
+
"step": 14923
|
| 104472 |
+
},
|
| 104473 |
+
{
|
| 104474 |
+
"epoch": 16.9982905982906,
|
| 104475 |
+
"grad_norm": 0.20432667434215546,
|
| 104476 |
+
"learning_rate": 2.9587669293137136e-06,
|
| 104477 |
+
"loss": 0.8565,
|
| 104478 |
+
"step": 14924
|
| 104479 |
+
},
|
| 104480 |
+
{
|
| 104481 |
+
"epoch": 16.9994301994302,
|
| 104482 |
+
"grad_norm": 0.179723858833313,
|
| 104483 |
+
"learning_rate": 2.956568620527106e-06,
|
| 104484 |
+
"loss": 0.6696,
|
| 104485 |
+
"step": 14925
|
| 104486 |
+
},
|
| 104487 |
+
{
|
| 104488 |
+
"epoch": 17.0,
|
| 104489 |
+
"grad_norm": 0.2801351845264435,
|
| 104490 |
+
"learning_rate": 2.9543710773694915e-06,
|
| 104491 |
+
"loss": 0.7652,
|
| 104492 |
+
"step": 14926
|
| 104493 |
+
},
|
| 104494 |
+
{
|
| 104495 |
+
"epoch": 17.001139601139602,
|
| 104496 |
+
"grad_norm": 0.19512665271759033,
|
| 104497 |
+
"learning_rate": 2.952174299917193e-06,
|
| 104498 |
+
"loss": 0.6563,
|
| 104499 |
+
"step": 14927
|
| 104500 |
+
},
|
| 104501 |
+
{
|
| 104502 |
+
"epoch": 17.0022792022792,
|
| 104503 |
+
"grad_norm": 0.25168266892433167,
|
| 104504 |
+
"learning_rate": 2.949978288246516e-06,
|
| 104505 |
+
"loss": 0.6191,
|
| 104506 |
+
"step": 14928
|
| 104507 |
+
},
|
| 104508 |
+
{
|
| 104509 |
+
"epoch": 17.003418803418803,
|
| 104510 |
+
"grad_norm": 0.25868624448776245,
|
| 104511 |
+
"learning_rate": 2.947783042433727e-06,
|
| 104512 |
+
"loss": 0.3627,
|
| 104513 |
+
"step": 14929
|
| 104514 |
+
},
|
| 104515 |
+
{
|
| 104516 |
+
"epoch": 17.004558404558406,
|
| 104517 |
+
"grad_norm": 0.23300258815288544,
|
| 104518 |
+
"learning_rate": 2.945588562555085e-06,
|
| 104519 |
+
"loss": 0.5863,
|
| 104520 |
+
"step": 14930
|
| 104521 |
+
},
|
| 104522 |
+
{
|
| 104523 |
+
"epoch": 17.005698005698004,
|
| 104524 |
+
"grad_norm": 0.19605261087417603,
|
| 104525 |
+
"learning_rate": 2.943394848686795e-06,
|
| 104526 |
+
"loss": 0.6877,
|
| 104527 |
+
"step": 14931
|
| 104528 |
+
},
|
| 104529 |
+
{
|
| 104530 |
+
"epoch": 17.006837606837607,
|
| 104531 |
+
"grad_norm": 0.24441637098789215,
|
| 104532 |
+
"learning_rate": 2.9412019009050606e-06,
|
| 104533 |
+
"loss": 0.4629,
|
| 104534 |
+
"step": 14932
|
| 104535 |
+
},
|
| 104536 |
+
{
|
| 104537 |
+
"epoch": 17.00797720797721,
|
| 104538 |
+
"grad_norm": 0.19533565640449524,
|
| 104539 |
+
"learning_rate": 2.9390097192860425e-06,
|
| 104540 |
+
"loss": 0.8337,
|
| 104541 |
+
"step": 14933
|
| 104542 |
+
},
|
| 104543 |
+
{
|
| 104544 |
+
"epoch": 17.009116809116808,
|
| 104545 |
+
"grad_norm": 0.16391415894031525,
|
| 104546 |
+
"learning_rate": 2.9368183039058916e-06,
|
| 104547 |
+
"loss": 1.0077,
|
| 104548 |
+
"step": 14934
|
| 104549 |
+
},
|
| 104550 |
+
{
|
| 104551 |
+
"epoch": 17.01025641025641,
|
| 104552 |
+
"grad_norm": 0.16765032708644867,
|
| 104553 |
+
"learning_rate": 2.9346276548407102e-06,
|
| 104554 |
+
"loss": 0.649,
|
| 104555 |
+
"step": 14935
|
| 104556 |
+
},
|
| 104557 |
+
{
|
| 104558 |
+
"epoch": 17.011396011396013,
|
| 104559 |
+
"grad_norm": 0.18843749165534973,
|
| 104560 |
+
"learning_rate": 2.932437772166591e-06,
|
| 104561 |
+
"loss": 0.7146,
|
| 104562 |
+
"step": 14936
|
| 104563 |
+
},
|
| 104564 |
+
{
|
| 104565 |
+
"epoch": 17.01253561253561,
|
| 104566 |
+
"grad_norm": 0.1859249472618103,
|
| 104567 |
+
"learning_rate": 2.9302486559595917e-06,
|
| 104568 |
+
"loss": 0.5675,
|
| 104569 |
+
"step": 14937
|
| 104570 |
+
},
|
| 104571 |
+
{
|
| 104572 |
+
"epoch": 17.013675213675214,
|
| 104573 |
+
"grad_norm": 0.20158928632736206,
|
| 104574 |
+
"learning_rate": 2.928060306295749e-06,
|
| 104575 |
+
"loss": 0.6998,
|
| 104576 |
+
"step": 14938
|
| 104577 |
+
},
|
| 104578 |
+
{
|
| 104579 |
+
"epoch": 17.014814814814816,
|
| 104580 |
+
"grad_norm": 0.1976604163646698,
|
| 104581 |
+
"learning_rate": 2.925872723251072e-06,
|
| 104582 |
+
"loss": 0.5616,
|
| 104583 |
+
"step": 14939
|
| 104584 |
+
},
|
| 104585 |
+
{
|
| 104586 |
+
"epoch": 17.015954415954415,
|
| 104587 |
+
"grad_norm": 0.17264212667942047,
|
| 104588 |
+
"learning_rate": 2.9236859069015382e-06,
|
| 104589 |
+
"loss": 0.4989,
|
| 104590 |
+
"step": 14940
|
| 104591 |
+
},
|
| 104592 |
+
{
|
| 104593 |
+
"epoch": 17.017094017094017,
|
| 104594 |
+
"grad_norm": 0.17468909919261932,
|
| 104595 |
+
"learning_rate": 2.9214998573231013e-06,
|
| 104596 |
+
"loss": 0.6128,
|
| 104597 |
+
"step": 14941
|
| 104598 |
+
},
|
| 104599 |
+
{
|
| 104600 |
+
"epoch": 17.01823361823362,
|
| 104601 |
+
"grad_norm": 0.1987021416425705,
|
| 104602 |
+
"learning_rate": 2.9193145745916973e-06,
|
| 104603 |
+
"loss": 0.7211,
|
| 104604 |
+
"step": 14942
|
| 104605 |
+
},
|
| 104606 |
+
{
|
| 104607 |
+
"epoch": 17.019373219373218,
|
| 104608 |
+
"grad_norm": 0.2015351802110672,
|
| 104609 |
+
"learning_rate": 2.9171300587832158e-06,
|
| 104610 |
+
"loss": 0.6194,
|
| 104611 |
+
"step": 14943
|
| 104612 |
+
},
|
| 104613 |
+
{
|
| 104614 |
+
"epoch": 17.02051282051282,
|
| 104615 |
+
"grad_norm": 0.16504210233688354,
|
| 104616 |
+
"learning_rate": 2.9149463099735348e-06,
|
| 104617 |
+
"loss": 0.636,
|
| 104618 |
+
"step": 14944
|
| 104619 |
+
},
|
| 104620 |
+
{
|
| 104621 |
+
"epoch": 17.021652421652423,
|
| 104622 |
+
"grad_norm": 0.20216158032417297,
|
| 104623 |
+
"learning_rate": 2.912763328238502e-06,
|
| 104624 |
+
"loss": 0.7022,
|
| 104625 |
+
"step": 14945
|
| 104626 |
+
},
|
| 104627 |
+
{
|
| 104628 |
+
"epoch": 17.02279202279202,
|
| 104629 |
+
"grad_norm": 0.18127597868442535,
|
| 104630 |
+
"learning_rate": 2.9105811136539456e-06,
|
| 104631 |
+
"loss": 0.6556,
|
| 104632 |
+
"step": 14946
|
| 104633 |
+
},
|
| 104634 |
+
{
|
| 104635 |
+
"epoch": 17.023931623931624,
|
| 104636 |
+
"grad_norm": 0.17371995747089386,
|
| 104637 |
+
"learning_rate": 2.908399666295647e-06,
|
| 104638 |
+
"loss": 0.5179,
|
| 104639 |
+
"step": 14947
|
| 104640 |
+
},
|
| 104641 |
+
{
|
| 104642 |
+
"epoch": 17.025071225071226,
|
| 104643 |
+
"grad_norm": 0.19458843767642975,
|
| 104644 |
+
"learning_rate": 2.906218986239381e-06,
|
| 104645 |
+
"loss": 0.8547,
|
| 104646 |
+
"step": 14948
|
| 104647 |
+
},
|
| 104648 |
+
{
|
| 104649 |
+
"epoch": 17.026210826210825,
|
| 104650 |
+
"grad_norm": 0.2290373146533966,
|
| 104651 |
+
"learning_rate": 2.904039073560885e-06,
|
| 104652 |
+
"loss": 0.5221,
|
| 104653 |
+
"step": 14949
|
| 104654 |
+
},
|
| 104655 |
+
{
|
| 104656 |
+
"epoch": 17.027350427350427,
|
| 104657 |
+
"grad_norm": 0.17556844651699066,
|
| 104658 |
+
"learning_rate": 2.9018599283358787e-06,
|
| 104659 |
+
"loss": 0.6471,
|
| 104660 |
+
"step": 14950
|
| 104661 |
+
},
|
| 104662 |
+
{
|
| 104663 |
+
"epoch": 17.02849002849003,
|
| 104664 |
+
"grad_norm": 0.23710325360298157,
|
| 104665 |
+
"learning_rate": 2.8996815506400458e-06,
|
| 104666 |
+
"loss": 0.5864,
|
| 104667 |
+
"step": 14951
|
| 104668 |
+
},
|
| 104669 |
+
{
|
| 104670 |
+
"epoch": 17.02962962962963,
|
| 104671 |
+
"grad_norm": 0.2378053218126297,
|
| 104672 |
+
"learning_rate": 2.897503940549051e-06,
|
| 104673 |
+
"loss": 0.4265,
|
| 104674 |
+
"step": 14952
|
| 104675 |
+
},
|
| 104676 |
+
{
|
| 104677 |
+
"epoch": 17.03076923076923,
|
| 104678 |
+
"grad_norm": 0.21921874582767487,
|
| 104679 |
+
"learning_rate": 2.8953270981385313e-06,
|
| 104680 |
+
"loss": 0.7231,
|
| 104681 |
+
"step": 14953
|
| 104682 |
+
},
|
| 104683 |
+
{
|
| 104684 |
+
"epoch": 17.031908831908833,
|
| 104685 |
+
"grad_norm": 0.18270917236804962,
|
| 104686 |
+
"learning_rate": 2.893151023484081e-06,
|
| 104687 |
+
"loss": 0.556,
|
| 104688 |
+
"step": 14954
|
| 104689 |
+
},
|
| 104690 |
+
{
|
| 104691 |
+
"epoch": 17.03304843304843,
|
| 104692 |
+
"grad_norm": 0.18606983125209808,
|
| 104693 |
+
"learning_rate": 2.8909757166612925e-06,
|
| 104694 |
+
"loss": 0.7677,
|
| 104695 |
+
"step": 14955
|
| 104696 |
+
},
|
| 104697 |
+
{
|
| 104698 |
+
"epoch": 17.034188034188034,
|
| 104699 |
+
"grad_norm": 0.2043803483247757,
|
| 104700 |
+
"learning_rate": 2.8888011777457173e-06,
|
| 104701 |
+
"loss": 0.6551,
|
| 104702 |
+
"step": 14956
|
| 104703 |
+
},
|
| 104704 |
+
{
|
| 104705 |
+
"epoch": 17.035327635327636,
|
| 104706 |
+
"grad_norm": 0.1733582764863968,
|
| 104707 |
+
"learning_rate": 2.886627406812889e-06,
|
| 104708 |
+
"loss": 0.7667,
|
| 104709 |
+
"step": 14957
|
| 104710 |
+
},
|
| 104711 |
+
{
|
| 104712 |
+
"epoch": 17.036467236467235,
|
| 104713 |
+
"grad_norm": 0.18059155344963074,
|
| 104714 |
+
"learning_rate": 2.8844544039382943e-06,
|
| 104715 |
+
"loss": 0.8046,
|
| 104716 |
+
"step": 14958
|
| 104717 |
+
},
|
| 104718 |
+
{
|
| 104719 |
+
"epoch": 17.037606837606837,
|
| 104720 |
+
"grad_norm": 0.16338680684566498,
|
| 104721 |
+
"learning_rate": 2.88228216919742e-06,
|
| 104722 |
+
"loss": 0.7791,
|
| 104723 |
+
"step": 14959
|
| 104724 |
+
},
|
| 104725 |
+
{
|
| 104726 |
+
"epoch": 17.03874643874644,
|
| 104727 |
+
"grad_norm": 0.17464567720890045,
|
| 104728 |
+
"learning_rate": 2.880110702665709e-06,
|
| 104729 |
+
"loss": 0.6268,
|
| 104730 |
+
"step": 14960
|
| 104731 |
+
},
|
| 104732 |
+
{
|
| 104733 |
+
"epoch": 17.03988603988604,
|
| 104734 |
+
"grad_norm": 0.2310246229171753,
|
| 104735 |
+
"learning_rate": 2.8779400044185807e-06,
|
| 104736 |
+
"loss": 0.5787,
|
| 104737 |
+
"step": 14961
|
| 104738 |
+
},
|
| 104739 |
+
{
|
| 104740 |
+
"epoch": 17.04102564102564,
|
| 104741 |
+
"grad_norm": 0.18465375900268555,
|
| 104742 |
+
"learning_rate": 2.8757700745314337e-06,
|
| 104743 |
+
"loss": 0.8619,
|
| 104744 |
+
"step": 14962
|
| 104745 |
+
},
|
| 104746 |
+
{
|
| 104747 |
+
"epoch": 17.042165242165243,
|
| 104748 |
+
"grad_norm": 0.17990891635417938,
|
| 104749 |
+
"learning_rate": 2.873600913079635e-06,
|
| 104750 |
+
"loss": 0.7925,
|
| 104751 |
+
"step": 14963
|
| 104752 |
+
},
|
| 104753 |
+
{
|
| 104754 |
+
"epoch": 17.043304843304842,
|
| 104755 |
+
"grad_norm": 0.2204209566116333,
|
| 104756 |
+
"learning_rate": 2.8714325201385284e-06,
|
| 104757 |
+
"loss": 0.6887,
|
| 104758 |
+
"step": 14964
|
| 104759 |
+
},
|
| 104760 |
+
{
|
| 104761 |
+
"epoch": 17.044444444444444,
|
| 104762 |
+
"grad_norm": 0.18174409866333008,
|
| 104763 |
+
"learning_rate": 2.869264895783419e-06,
|
| 104764 |
+
"loss": 0.4737,
|
| 104765 |
+
"step": 14965
|
| 104766 |
+
},
|
| 104767 |
+
{
|
| 104768 |
+
"epoch": 17.045584045584047,
|
| 104769 |
+
"grad_norm": 0.19974087178707123,
|
| 104770 |
+
"learning_rate": 2.8670980400895997e-06,
|
| 104771 |
+
"loss": 0.5616,
|
| 104772 |
+
"step": 14966
|
| 104773 |
+
},
|
| 104774 |
+
{
|
| 104775 |
+
"epoch": 17.046723646723645,
|
| 104776 |
+
"grad_norm": 0.20438915491104126,
|
| 104777 |
+
"learning_rate": 2.8649319531323324e-06,
|
| 104778 |
+
"loss": 0.4612,
|
| 104779 |
+
"step": 14967
|
| 104780 |
+
},
|
| 104781 |
+
{
|
| 104782 |
+
"epoch": 17.047863247863248,
|
| 104783 |
+
"grad_norm": 0.17648115754127502,
|
| 104784 |
+
"learning_rate": 2.862766634986855e-06,
|
| 104785 |
+
"loss": 0.8006,
|
| 104786 |
+
"step": 14968
|
| 104787 |
+
},
|
| 104788 |
+
{
|
| 104789 |
+
"epoch": 17.04900284900285,
|
| 104790 |
+
"grad_norm": 0.20189687609672546,
|
| 104791 |
+
"learning_rate": 2.8606020857283615e-06,
|
| 104792 |
+
"loss": 0.6507,
|
| 104793 |
+
"step": 14969
|
| 104794 |
+
},
|
| 104795 |
+
{
|
| 104796 |
+
"epoch": 17.05014245014245,
|
| 104797 |
+
"grad_norm": 0.3259961009025574,
|
| 104798 |
+
"learning_rate": 2.8584383054320456e-06,
|
| 104799 |
+
"loss": 0.487,
|
| 104800 |
+
"step": 14970
|
| 104801 |
+
},
|
| 104802 |
+
{
|
| 104803 |
+
"epoch": 17.05128205128205,
|
| 104804 |
+
"grad_norm": 0.24771519005298615,
|
| 104805 |
+
"learning_rate": 2.8562752941730552e-06,
|
| 104806 |
+
"loss": 0.4163,
|
| 104807 |
+
"step": 14971
|
| 104808 |
+
},
|
| 104809 |
+
{
|
| 104810 |
+
"epoch": 17.052421652421653,
|
| 104811 |
+
"grad_norm": 0.19273053109645844,
|
| 104812 |
+
"learning_rate": 2.854113052026519e-06,
|
| 104813 |
+
"loss": 0.6057,
|
| 104814 |
+
"step": 14972
|
| 104815 |
+
},
|
| 104816 |
+
{
|
| 104817 |
+
"epoch": 17.053561253561252,
|
| 104818 |
+
"grad_norm": 0.18914632499217987,
|
| 104819 |
+
"learning_rate": 2.8519515790675354e-06,
|
| 104820 |
+
"loss": 0.601,
|
| 104821 |
+
"step": 14973
|
| 104822 |
+
},
|
| 104823 |
+
{
|
| 104824 |
+
"epoch": 17.054700854700855,
|
| 104825 |
+
"grad_norm": 0.1870405226945877,
|
| 104826 |
+
"learning_rate": 2.849790875371186e-06,
|
| 104827 |
+
"loss": 0.6059,
|
| 104828 |
+
"step": 14974
|
| 104829 |
+
},
|
| 104830 |
+
{
|
| 104831 |
+
"epoch": 17.055840455840457,
|
| 104832 |
+
"grad_norm": 0.19459015130996704,
|
| 104833 |
+
"learning_rate": 2.8476309410125136e-06,
|
| 104834 |
+
"loss": 0.5807,
|
| 104835 |
+
"step": 14975
|
| 104836 |
+
},
|
| 104837 |
+
{
|
| 104838 |
+
"epoch": 17.056980056980056,
|
| 104839 |
+
"grad_norm": 0.217820942401886,
|
| 104840 |
+
"learning_rate": 2.8454717760665355e-06,
|
| 104841 |
+
"loss": 0.5976,
|
| 104842 |
+
"step": 14976
|
| 104843 |
+
},
|
| 104844 |
+
{
|
| 104845 |
+
"epoch": 17.058119658119658,
|
| 104846 |
+
"grad_norm": 0.17932353913784027,
|
| 104847 |
+
"learning_rate": 2.8433133806082452e-06,
|
| 104848 |
+
"loss": 0.6844,
|
| 104849 |
+
"step": 14977
|
| 104850 |
+
},
|
| 104851 |
+
{
|
| 104852 |
+
"epoch": 17.05925925925926,
|
| 104853 |
+
"grad_norm": 0.2388077676296234,
|
| 104854 |
+
"learning_rate": 2.8411557547126154e-06,
|
| 104855 |
+
"loss": 0.6194,
|
| 104856 |
+
"step": 14978
|
| 104857 |
+
},
|
| 104858 |
+
{
|
| 104859 |
+
"epoch": 17.06039886039886,
|
| 104860 |
+
"grad_norm": 0.2089555561542511,
|
| 104861 |
+
"learning_rate": 2.8389988984545868e-06,
|
| 104862 |
+
"loss": 0.7011,
|
| 104863 |
+
"step": 14979
|
| 104864 |
+
},
|
| 104865 |
+
{
|
| 104866 |
+
"epoch": 17.06153846153846,
|
| 104867 |
+
"grad_norm": 0.22288168966770172,
|
| 104868 |
+
"learning_rate": 2.836842811909063e-06,
|
| 104869 |
+
"loss": 0.7498,
|
| 104870 |
+
"step": 14980
|
| 104871 |
+
},
|
| 104872 |
+
{
|
| 104873 |
+
"epoch": 17.062678062678064,
|
| 104874 |
+
"grad_norm": 0.2411726415157318,
|
| 104875 |
+
"learning_rate": 2.8346874951509394e-06,
|
| 104876 |
+
"loss": 0.7155,
|
| 104877 |
+
"step": 14981
|
| 104878 |
+
},
|
| 104879 |
+
{
|
| 104880 |
+
"epoch": 17.063817663817662,
|
| 104881 |
+
"grad_norm": 0.18449333310127258,
|
| 104882 |
+
"learning_rate": 2.832532948255068e-06,
|
| 104883 |
+
"loss": 0.6725,
|
| 104884 |
+
"step": 14982
|
| 104885 |
+
},
|
| 104886 |
+
{
|
| 104887 |
+
"epoch": 17.064957264957265,
|
| 104888 |
+
"grad_norm": 0.17795316874980927,
|
| 104889 |
+
"learning_rate": 2.830379171296299e-06,
|
| 104890 |
+
"loss": 0.7807,
|
| 104891 |
+
"step": 14983
|
| 104892 |
+
},
|
| 104893 |
+
{
|
| 104894 |
+
"epoch": 17.066096866096867,
|
| 104895 |
+
"grad_norm": 0.16813020408153534,
|
| 104896 |
+
"learning_rate": 2.8282261643494205e-06,
|
| 104897 |
+
"loss": 0.662,
|
| 104898 |
+
"step": 14984
|
| 104899 |
+
},
|
| 104900 |
+
{
|
| 104901 |
+
"epoch": 17.067236467236466,
|
| 104902 |
+
"grad_norm": 0.20966660976409912,
|
| 104903 |
+
"learning_rate": 2.826073927489223e-06,
|
| 104904 |
+
"loss": 0.4603,
|
| 104905 |
+
"step": 14985
|
| 104906 |
+
},
|
| 104907 |
+
{
|
| 104908 |
+
"epoch": 17.068376068376068,
|
| 104909 |
+
"grad_norm": 0.16195650398731232,
|
| 104910 |
+
"learning_rate": 2.823922460790454e-06,
|
| 104911 |
+
"loss": 0.731,
|
| 104912 |
+
"step": 14986
|
| 104913 |
+
},
|
| 104914 |
+
{
|
| 104915 |
+
"epoch": 17.06951566951567,
|
| 104916 |
+
"grad_norm": 0.20361843705177307,
|
| 104917 |
+
"learning_rate": 2.821771764327849e-06,
|
| 104918 |
+
"loss": 0.4872,
|
| 104919 |
+
"step": 14987
|
| 104920 |
+
},
|
| 104921 |
+
{
|
| 104922 |
+
"epoch": 17.07065527065527,
|
| 104923 |
+
"grad_norm": 0.22102849185466766,
|
| 104924 |
+
"learning_rate": 2.819621838176095e-06,
|
| 104925 |
+
"loss": 0.6218,
|
| 104926 |
+
"step": 14988
|
| 104927 |
+
},
|
| 104928 |
+
{
|
| 104929 |
+
"epoch": 17.07179487179487,
|
| 104930 |
+
"grad_norm": 0.17357826232910156,
|
| 104931 |
+
"learning_rate": 2.817472682409872e-06,
|
| 104932 |
+
"loss": 0.6965,
|
| 104933 |
+
"step": 14989
|
| 104934 |
+
},
|
| 104935 |
+
{
|
| 104936 |
+
"epoch": 17.072934472934474,
|
| 104937 |
+
"grad_norm": 0.18162836134433746,
|
| 104938 |
+
"learning_rate": 2.8153242971038245e-06,
|
| 104939 |
+
"loss": 0.77,
|
| 104940 |
+
"step": 14990
|
| 104941 |
+
},
|
| 104942 |
+
{
|
| 104943 |
+
"epoch": 17.074074074074073,
|
| 104944 |
+
"grad_norm": 0.22448180615901947,
|
| 104945 |
+
"learning_rate": 2.8131766823325772e-06,
|
| 104946 |
+
"loss": 0.6818,
|
| 104947 |
+
"step": 14991
|
| 104948 |
+
},
|
| 104949 |
+
{
|
| 104950 |
+
"epoch": 17.075213675213675,
|
| 104951 |
+
"grad_norm": 0.19140951335430145,
|
| 104952 |
+
"learning_rate": 2.8110298381707147e-06,
|
| 104953 |
+
"loss": 0.6996,
|
| 104954 |
+
"step": 14992
|
| 104955 |
+
},
|
| 104956 |
+
{
|
| 104957 |
+
"epoch": 17.076353276353277,
|
| 104958 |
+
"grad_norm": 0.1942320168018341,
|
| 104959 |
+
"learning_rate": 2.808883764692799e-06,
|
| 104960 |
+
"loss": 0.5972,
|
| 104961 |
+
"step": 14993
|
| 104962 |
+
},
|
| 104963 |
+
{
|
| 104964 |
+
"epoch": 17.077492877492876,
|
| 104965 |
+
"grad_norm": 0.17436128854751587,
|
| 104966 |
+
"learning_rate": 2.806738461973385e-06,
|
| 104967 |
+
"loss": 0.7579,
|
| 104968 |
+
"step": 14994
|
| 104969 |
+
},
|
| 104970 |
+
{
|
| 104971 |
+
"epoch": 17.07863247863248,
|
| 104972 |
+
"grad_norm": 0.1683426797389984,
|
| 104973 |
+
"learning_rate": 2.8045939300869712e-06,
|
| 104974 |
+
"loss": 0.655,
|
| 104975 |
+
"step": 14995
|
| 104976 |
+
},
|
| 104977 |
+
{
|
| 104978 |
+
"epoch": 17.07977207977208,
|
| 104979 |
+
"grad_norm": 0.19002686440944672,
|
| 104980 |
+
"learning_rate": 2.8024501691080478e-06,
|
| 104981 |
+
"loss": 0.5977,
|
| 104982 |
+
"step": 14996
|
| 104983 |
+
},
|
| 104984 |
+
{
|
| 104985 |
+
"epoch": 17.08091168091168,
|
| 104986 |
+
"grad_norm": 0.195677250623703,
|
| 104987 |
+
"learning_rate": 2.8003071791110752e-06,
|
| 104988 |
+
"loss": 0.5108,
|
| 104989 |
+
"step": 14997
|
| 104990 |
+
},
|
| 104991 |
+
{
|
| 104992 |
+
"epoch": 17.08205128205128,
|
| 104993 |
+
"grad_norm": 0.17492468655109406,
|
| 104994 |
+
"learning_rate": 2.798164960170488e-06,
|
| 104995 |
+
"loss": 0.6474,
|
| 104996 |
+
"step": 14998
|
| 104997 |
+
},
|
| 104998 |
+
{
|
| 104999 |
+
"epoch": 17.083190883190884,
|
| 105000 |
+
"grad_norm": 0.24908673763275146,
|
| 105001 |
+
"learning_rate": 2.796023512360679e-06,
|
| 105002 |
+
"loss": 0.5377,
|
| 105003 |
+
"step": 14999
|
| 105004 |
+
},
|
| 105005 |
+
{
|
| 105006 |
+
"epoch": 17.084330484330483,
|
| 105007 |
+
"grad_norm": 0.18173104524612427,
|
| 105008 |
+
"learning_rate": 2.7938828357560397e-06,
|
| 105009 |
+
"loss": 0.5363,
|
| 105010 |
+
"step": 15000
|
| 105011 |
}
|
| 105012 |
],
|
| 105013 |
"logging_steps": 1,
|
|
|
|
| 105027 |
"attributes": {}
|
| 105028 |
}
|
| 105029 |
},
|
| 105030 |
+
"total_flos": 8.386647104829948e+19,
|
| 105031 |
"train_batch_size": 8,
|
| 105032 |
"trial_name": null,
|
| 105033 |
"trial_params": null
|