Training in progress, step 14400, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000555808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c541e17689fc0ffaa9fa38bba4d4925ce8a80e9d79f0bdc35458edd0540799f
|
| 3 |
size 1000555808
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1318473087
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d599eb0373a6cd6d70f2d7c1523616db452aac20d6ee53107c599b58771104f
|
| 3 |
size 1318473087
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e301d3b5590ff63050598c546773b4cecd7962655131ebde60a7c5281eb1728
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -98708,6 +98708,2106 @@
|
|
| 98708 |
"learning_rate": 9.271395237051277e-05,
|
| 98709 |
"loss": 2.3777,
|
| 98710 |
"step": 14100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98711 |
}
|
| 98712 |
],
|
| 98713 |
"logging_steps": 1,
|
|
@@ -98727,7 +100827,7 @@
|
|
| 98727 |
"attributes": {}
|
| 98728 |
}
|
| 98729 |
},
|
| 98730 |
-
"total_flos": 7.
|
| 98731 |
"train_batch_size": 8,
|
| 98732 |
"trial_name": null,
|
| 98733 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.8,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 98708 |
"learning_rate": 9.271395237051277e-05,
|
| 98709 |
"loss": 2.3777,
|
| 98710 |
"step": 14100
|
| 98711 |
+
},
|
| 98712 |
+
{
|
| 98713 |
+
"epoch": 0.7833888888888889,
|
| 98714 |
+
"grad_norm": 0.099609375,
|
| 98715 |
+
"learning_rate": 9.266835130674895e-05,
|
| 98716 |
+
"loss": 2.373,
|
| 98717 |
+
"step": 14101
|
| 98718 |
+
},
|
| 98719 |
+
{
|
| 98720 |
+
"epoch": 0.7834444444444445,
|
| 98721 |
+
"grad_norm": 0.1015625,
|
| 98722 |
+
"learning_rate": 9.262275999085819e-05,
|
| 98723 |
+
"loss": 2.5327,
|
| 98724 |
+
"step": 14102
|
| 98725 |
+
},
|
| 98726 |
+
{
|
| 98727 |
+
"epoch": 0.7835,
|
| 98728 |
+
"grad_norm": 0.099609375,
|
| 98729 |
+
"learning_rate": 9.25771784242866e-05,
|
| 98730 |
+
"loss": 2.3124,
|
| 98731 |
+
"step": 14103
|
| 98732 |
+
},
|
| 98733 |
+
{
|
| 98734 |
+
"epoch": 0.7835555555555556,
|
| 98735 |
+
"grad_norm": 0.1025390625,
|
| 98736 |
+
"learning_rate": 9.253160660848012e-05,
|
| 98737 |
+
"loss": 2.5139,
|
| 98738 |
+
"step": 14104
|
| 98739 |
+
},
|
| 98740 |
+
{
|
| 98741 |
+
"epoch": 0.7836111111111111,
|
| 98742 |
+
"grad_norm": 0.099609375,
|
| 98743 |
+
"learning_rate": 9.248604454488385e-05,
|
| 98744 |
+
"loss": 2.3436,
|
| 98745 |
+
"step": 14105
|
| 98746 |
+
},
|
| 98747 |
+
{
|
| 98748 |
+
"epoch": 0.7836666666666666,
|
| 98749 |
+
"grad_norm": 0.1005859375,
|
| 98750 |
+
"learning_rate": 9.244049223494306e-05,
|
| 98751 |
+
"loss": 2.4217,
|
| 98752 |
+
"step": 14106
|
| 98753 |
+
},
|
| 98754 |
+
{
|
| 98755 |
+
"epoch": 0.7837222222222222,
|
| 98756 |
+
"grad_norm": 0.10107421875,
|
| 98757 |
+
"learning_rate": 9.239494968010269e-05,
|
| 98758 |
+
"loss": 2.444,
|
| 98759 |
+
"step": 14107
|
| 98760 |
+
},
|
| 98761 |
+
{
|
| 98762 |
+
"epoch": 0.7837777777777778,
|
| 98763 |
+
"grad_norm": 0.10009765625,
|
| 98764 |
+
"learning_rate": 9.234941688180714e-05,
|
| 98765 |
+
"loss": 2.3437,
|
| 98766 |
+
"step": 14108
|
| 98767 |
+
},
|
| 98768 |
+
{
|
| 98769 |
+
"epoch": 0.7838333333333334,
|
| 98770 |
+
"grad_norm": 0.099609375,
|
| 98771 |
+
"learning_rate": 9.230389384150053e-05,
|
| 98772 |
+
"loss": 2.4231,
|
| 98773 |
+
"step": 14109
|
| 98774 |
+
},
|
| 98775 |
+
{
|
| 98776 |
+
"epoch": 0.7838888888888889,
|
| 98777 |
+
"grad_norm": 0.10205078125,
|
| 98778 |
+
"learning_rate": 9.225838056062693e-05,
|
| 98779 |
+
"loss": 2.5072,
|
| 98780 |
+
"step": 14110
|
| 98781 |
+
},
|
| 98782 |
+
{
|
| 98783 |
+
"epoch": 0.7839444444444444,
|
| 98784 |
+
"grad_norm": 0.1005859375,
|
| 98785 |
+
"learning_rate": 9.221287704062978e-05,
|
| 98786 |
+
"loss": 2.341,
|
| 98787 |
+
"step": 14111
|
| 98788 |
+
},
|
| 98789 |
+
{
|
| 98790 |
+
"epoch": 0.784,
|
| 98791 |
+
"grad_norm": 0.10107421875,
|
| 98792 |
+
"learning_rate": 9.216738328295233e-05,
|
| 98793 |
+
"loss": 2.4755,
|
| 98794 |
+
"step": 14112
|
| 98795 |
+
},
|
| 98796 |
+
{
|
| 98797 |
+
"epoch": 0.7840555555555555,
|
| 98798 |
+
"grad_norm": 0.1005859375,
|
| 98799 |
+
"learning_rate": 9.212189928903759e-05,
|
| 98800 |
+
"loss": 2.3922,
|
| 98801 |
+
"step": 14113
|
| 98802 |
+
},
|
| 98803 |
+
{
|
| 98804 |
+
"epoch": 0.7841111111111111,
|
| 98805 |
+
"grad_norm": 0.1005859375,
|
| 98806 |
+
"learning_rate": 9.207642506032832e-05,
|
| 98807 |
+
"loss": 2.4079,
|
| 98808 |
+
"step": 14114
|
| 98809 |
+
},
|
| 98810 |
+
{
|
| 98811 |
+
"epoch": 0.7841666666666667,
|
| 98812 |
+
"grad_norm": 0.10205078125,
|
| 98813 |
+
"learning_rate": 9.203096059826677e-05,
|
| 98814 |
+
"loss": 2.4436,
|
| 98815 |
+
"step": 14115
|
| 98816 |
+
},
|
| 98817 |
+
{
|
| 98818 |
+
"epoch": 0.7842222222222223,
|
| 98819 |
+
"grad_norm": 0.1015625,
|
| 98820 |
+
"learning_rate": 9.198550590429489e-05,
|
| 98821 |
+
"loss": 2.3543,
|
| 98822 |
+
"step": 14116
|
| 98823 |
+
},
|
| 98824 |
+
{
|
| 98825 |
+
"epoch": 0.7842777777777777,
|
| 98826 |
+
"grad_norm": 0.10400390625,
|
| 98827 |
+
"learning_rate": 9.194006097985455e-05,
|
| 98828 |
+
"loss": 2.5394,
|
| 98829 |
+
"step": 14117
|
| 98830 |
+
},
|
| 98831 |
+
{
|
| 98832 |
+
"epoch": 0.7843333333333333,
|
| 98833 |
+
"grad_norm": 0.10107421875,
|
| 98834 |
+
"learning_rate": 9.189462582638708e-05,
|
| 98835 |
+
"loss": 2.3177,
|
| 98836 |
+
"step": 14118
|
| 98837 |
+
},
|
| 98838 |
+
{
|
| 98839 |
+
"epoch": 0.7843888888888889,
|
| 98840 |
+
"grad_norm": 0.10107421875,
|
| 98841 |
+
"learning_rate": 9.184920044533348e-05,
|
| 98842 |
+
"loss": 2.383,
|
| 98843 |
+
"step": 14119
|
| 98844 |
+
},
|
| 98845 |
+
{
|
| 98846 |
+
"epoch": 0.7844444444444445,
|
| 98847 |
+
"grad_norm": 0.10107421875,
|
| 98848 |
+
"learning_rate": 9.180378483813479e-05,
|
| 98849 |
+
"loss": 2.4585,
|
| 98850 |
+
"step": 14120
|
| 98851 |
+
},
|
| 98852 |
+
{
|
| 98853 |
+
"epoch": 0.7845,
|
| 98854 |
+
"grad_norm": 0.10009765625,
|
| 98855 |
+
"learning_rate": 9.175837900623121e-05,
|
| 98856 |
+
"loss": 2.3247,
|
| 98857 |
+
"step": 14121
|
| 98858 |
+
},
|
| 98859 |
+
{
|
| 98860 |
+
"epoch": 0.7845555555555556,
|
| 98861 |
+
"grad_norm": 0.10107421875,
|
| 98862 |
+
"learning_rate": 9.171298295106319e-05,
|
| 98863 |
+
"loss": 2.4003,
|
| 98864 |
+
"step": 14122
|
| 98865 |
+
},
|
| 98866 |
+
{
|
| 98867 |
+
"epoch": 0.7846111111111111,
|
| 98868 |
+
"grad_norm": 0.1025390625,
|
| 98869 |
+
"learning_rate": 9.166759667407032e-05,
|
| 98870 |
+
"loss": 2.5369,
|
| 98871 |
+
"step": 14123
|
| 98872 |
+
},
|
| 98873 |
+
{
|
| 98874 |
+
"epoch": 0.7846666666666666,
|
| 98875 |
+
"grad_norm": 0.1005859375,
|
| 98876 |
+
"learning_rate": 9.162222017669244e-05,
|
| 98877 |
+
"loss": 2.3279,
|
| 98878 |
+
"step": 14124
|
| 98879 |
+
},
|
| 98880 |
+
{
|
| 98881 |
+
"epoch": 0.7847222222222222,
|
| 98882 |
+
"grad_norm": 0.1015625,
|
| 98883 |
+
"learning_rate": 9.157685346036857e-05,
|
| 98884 |
+
"loss": 2.4454,
|
| 98885 |
+
"step": 14125
|
| 98886 |
+
},
|
| 98887 |
+
{
|
| 98888 |
+
"epoch": 0.7847777777777778,
|
| 98889 |
+
"grad_norm": 0.1005859375,
|
| 98890 |
+
"learning_rate": 9.153149652653765e-05,
|
| 98891 |
+
"loss": 2.3905,
|
| 98892 |
+
"step": 14126
|
| 98893 |
+
},
|
| 98894 |
+
{
|
| 98895 |
+
"epoch": 0.7848333333333334,
|
| 98896 |
+
"grad_norm": 0.1015625,
|
| 98897 |
+
"learning_rate": 9.148614937663844e-05,
|
| 98898 |
+
"loss": 2.4005,
|
| 98899 |
+
"step": 14127
|
| 98900 |
+
},
|
| 98901 |
+
{
|
| 98902 |
+
"epoch": 0.7848888888888889,
|
| 98903 |
+
"grad_norm": 0.10205078125,
|
| 98904 |
+
"learning_rate": 9.144081201210907e-05,
|
| 98905 |
+
"loss": 2.476,
|
| 98906 |
+
"step": 14128
|
| 98907 |
+
},
|
| 98908 |
+
{
|
| 98909 |
+
"epoch": 0.7849444444444444,
|
| 98910 |
+
"grad_norm": 0.1005859375,
|
| 98911 |
+
"learning_rate": 9.139548443438779e-05,
|
| 98912 |
+
"loss": 2.4382,
|
| 98913 |
+
"step": 14129
|
| 98914 |
+
},
|
| 98915 |
+
{
|
| 98916 |
+
"epoch": 0.785,
|
| 98917 |
+
"grad_norm": 0.10009765625,
|
| 98918 |
+
"learning_rate": 9.13501666449121e-05,
|
| 98919 |
+
"loss": 2.3611,
|
| 98920 |
+
"step": 14130
|
| 98921 |
+
},
|
| 98922 |
+
{
|
| 98923 |
+
"epoch": 0.7850555555555555,
|
| 98924 |
+
"grad_norm": 0.10009765625,
|
| 98925 |
+
"learning_rate": 9.130485864511929e-05,
|
| 98926 |
+
"loss": 2.4806,
|
| 98927 |
+
"step": 14131
|
| 98928 |
+
},
|
| 98929 |
+
{
|
| 98930 |
+
"epoch": 0.7851111111111111,
|
| 98931 |
+
"grad_norm": 0.1025390625,
|
| 98932 |
+
"learning_rate": 9.12595604364467e-05,
|
| 98933 |
+
"loss": 2.4959,
|
| 98934 |
+
"step": 14132
|
| 98935 |
+
},
|
| 98936 |
+
{
|
| 98937 |
+
"epoch": 0.7851666666666667,
|
| 98938 |
+
"grad_norm": 0.1005859375,
|
| 98939 |
+
"learning_rate": 9.121427202033083e-05,
|
| 98940 |
+
"loss": 2.3604,
|
| 98941 |
+
"step": 14133
|
| 98942 |
+
},
|
| 98943 |
+
{
|
| 98944 |
+
"epoch": 0.7852222222222223,
|
| 98945 |
+
"grad_norm": 0.10009765625,
|
| 98946 |
+
"learning_rate": 9.116899339820833e-05,
|
| 98947 |
+
"loss": 2.2994,
|
| 98948 |
+
"step": 14134
|
| 98949 |
+
},
|
| 98950 |
+
{
|
| 98951 |
+
"epoch": 0.7852777777777777,
|
| 98952 |
+
"grad_norm": 0.1025390625,
|
| 98953 |
+
"learning_rate": 9.112372457151516e-05,
|
| 98954 |
+
"loss": 2.5439,
|
| 98955 |
+
"step": 14135
|
| 98956 |
+
},
|
| 98957 |
+
{
|
| 98958 |
+
"epoch": 0.7853333333333333,
|
| 98959 |
+
"grad_norm": 0.09765625,
|
| 98960 |
+
"learning_rate": 9.107846554168732e-05,
|
| 98961 |
+
"loss": 2.3084,
|
| 98962 |
+
"step": 14136
|
| 98963 |
+
},
|
| 98964 |
+
{
|
| 98965 |
+
"epoch": 0.7853888888888889,
|
| 98966 |
+
"grad_norm": 0.099609375,
|
| 98967 |
+
"learning_rate": 9.103321631016023e-05,
|
| 98968 |
+
"loss": 2.3689,
|
| 98969 |
+
"step": 14137
|
| 98970 |
+
},
|
| 98971 |
+
{
|
| 98972 |
+
"epoch": 0.7854444444444444,
|
| 98973 |
+
"grad_norm": 0.10107421875,
|
| 98974 |
+
"learning_rate": 9.098797687836906e-05,
|
| 98975 |
+
"loss": 2.5072,
|
| 98976 |
+
"step": 14138
|
| 98977 |
+
},
|
| 98978 |
+
{
|
| 98979 |
+
"epoch": 0.7855,
|
| 98980 |
+
"grad_norm": 0.10107421875,
|
| 98981 |
+
"learning_rate": 9.094274724774878e-05,
|
| 98982 |
+
"loss": 2.3568,
|
| 98983 |
+
"step": 14139
|
| 98984 |
+
},
|
| 98985 |
+
{
|
| 98986 |
+
"epoch": 0.7855555555555556,
|
| 98987 |
+
"grad_norm": 0.099609375,
|
| 98988 |
+
"learning_rate": 9.089752741973398e-05,
|
| 98989 |
+
"loss": 2.2738,
|
| 98990 |
+
"step": 14140
|
| 98991 |
+
},
|
| 98992 |
+
{
|
| 98993 |
+
"epoch": 0.7856111111111111,
|
| 98994 |
+
"grad_norm": 0.1005859375,
|
| 98995 |
+
"learning_rate": 9.085231739575881e-05,
|
| 98996 |
+
"loss": 2.4688,
|
| 98997 |
+
"step": 14141
|
| 98998 |
+
},
|
| 98999 |
+
{
|
| 99000 |
+
"epoch": 0.7856666666666666,
|
| 99001 |
+
"grad_norm": 0.09912109375,
|
| 99002 |
+
"learning_rate": 9.080711717725728e-05,
|
| 99003 |
+
"loss": 2.3468,
|
| 99004 |
+
"step": 14142
|
| 99005 |
+
},
|
| 99006 |
+
{
|
| 99007 |
+
"epoch": 0.7857222222222222,
|
| 99008 |
+
"grad_norm": 0.1005859375,
|
| 99009 |
+
"learning_rate": 9.07619267656632e-05,
|
| 99010 |
+
"loss": 2.3643,
|
| 99011 |
+
"step": 14143
|
| 99012 |
+
},
|
| 99013 |
+
{
|
| 99014 |
+
"epoch": 0.7857777777777778,
|
| 99015 |
+
"grad_norm": 0.10107421875,
|
| 99016 |
+
"learning_rate": 9.071674616240975e-05,
|
| 99017 |
+
"loss": 2.49,
|
| 99018 |
+
"step": 14144
|
| 99019 |
+
},
|
| 99020 |
+
{
|
| 99021 |
+
"epoch": 0.7858333333333334,
|
| 99022 |
+
"grad_norm": 0.1015625,
|
| 99023 |
+
"learning_rate": 9.067157536892987e-05,
|
| 99024 |
+
"loss": 2.431,
|
| 99025 |
+
"step": 14145
|
| 99026 |
+
},
|
| 99027 |
+
{
|
| 99028 |
+
"epoch": 0.7858888888888889,
|
| 99029 |
+
"grad_norm": 0.1005859375,
|
| 99030 |
+
"learning_rate": 9.062641438665655e-05,
|
| 99031 |
+
"loss": 2.4235,
|
| 99032 |
+
"step": 14146
|
| 99033 |
+
},
|
| 99034 |
+
{
|
| 99035 |
+
"epoch": 0.7859444444444444,
|
| 99036 |
+
"grad_norm": 0.1005859375,
|
| 99037 |
+
"learning_rate": 9.058126321702202e-05,
|
| 99038 |
+
"loss": 2.4839,
|
| 99039 |
+
"step": 14147
|
| 99040 |
+
},
|
| 99041 |
+
{
|
| 99042 |
+
"epoch": 0.786,
|
| 99043 |
+
"grad_norm": 0.09912109375,
|
| 99044 |
+
"learning_rate": 9.05361218614583e-05,
|
| 99045 |
+
"loss": 2.355,
|
| 99046 |
+
"step": 14148
|
| 99047 |
+
},
|
| 99048 |
+
{
|
| 99049 |
+
"epoch": 0.7860555555555555,
|
| 99050 |
+
"grad_norm": 0.099609375,
|
| 99051 |
+
"learning_rate": 9.049099032139726e-05,
|
| 99052 |
+
"loss": 2.3807,
|
| 99053 |
+
"step": 14149
|
| 99054 |
+
},
|
| 99055 |
+
{
|
| 99056 |
+
"epoch": 0.7861111111111111,
|
| 99057 |
+
"grad_norm": 0.1015625,
|
| 99058 |
+
"learning_rate": 9.044586859827063e-05,
|
| 99059 |
+
"loss": 2.4973,
|
| 99060 |
+
"step": 14150
|
| 99061 |
+
},
|
| 99062 |
+
{
|
| 99063 |
+
"epoch": 0.7861666666666667,
|
| 99064 |
+
"grad_norm": 0.1005859375,
|
| 99065 |
+
"learning_rate": 9.040075669350904e-05,
|
| 99066 |
+
"loss": 2.3524,
|
| 99067 |
+
"step": 14151
|
| 99068 |
+
},
|
| 99069 |
+
{
|
| 99070 |
+
"epoch": 0.7862222222222223,
|
| 99071 |
+
"grad_norm": 0.1005859375,
|
| 99072 |
+
"learning_rate": 9.035565460854365e-05,
|
| 99073 |
+
"loss": 2.3892,
|
| 99074 |
+
"step": 14152
|
| 99075 |
+
},
|
| 99076 |
+
{
|
| 99077 |
+
"epoch": 0.7862777777777777,
|
| 99078 |
+
"grad_norm": 0.10205078125,
|
| 99079 |
+
"learning_rate": 9.03105623448051e-05,
|
| 99080 |
+
"loss": 2.3891,
|
| 99081 |
+
"step": 14153
|
| 99082 |
+
},
|
| 99083 |
+
{
|
| 99084 |
+
"epoch": 0.7863333333333333,
|
| 99085 |
+
"grad_norm": 0.10009765625,
|
| 99086 |
+
"learning_rate": 9.026547990372347e-05,
|
| 99087 |
+
"loss": 2.3619,
|
| 99088 |
+
"step": 14154
|
| 99089 |
+
},
|
| 99090 |
+
{
|
| 99091 |
+
"epoch": 0.7863888888888889,
|
| 99092 |
+
"grad_norm": 0.1005859375,
|
| 99093 |
+
"learning_rate": 9.022040728672854e-05,
|
| 99094 |
+
"loss": 2.3879,
|
| 99095 |
+
"step": 14155
|
| 99096 |
+
},
|
| 99097 |
+
{
|
| 99098 |
+
"epoch": 0.7864444444444444,
|
| 99099 |
+
"grad_norm": 0.10009765625,
|
| 99100 |
+
"learning_rate": 9.017534449525022e-05,
|
| 99101 |
+
"loss": 2.4484,
|
| 99102 |
+
"step": 14156
|
| 99103 |
+
},
|
| 99104 |
+
{
|
| 99105 |
+
"epoch": 0.7865,
|
| 99106 |
+
"grad_norm": 0.09912109375,
|
| 99107 |
+
"learning_rate": 9.013029153071761e-05,
|
| 99108 |
+
"loss": 2.3506,
|
| 99109 |
+
"step": 14157
|
| 99110 |
+
},
|
| 99111 |
+
{
|
| 99112 |
+
"epoch": 0.7865555555555556,
|
| 99113 |
+
"grad_norm": 0.10107421875,
|
| 99114 |
+
"learning_rate": 9.008524839455966e-05,
|
| 99115 |
+
"loss": 2.3742,
|
| 99116 |
+
"step": 14158
|
| 99117 |
+
},
|
| 99118 |
+
{
|
| 99119 |
+
"epoch": 0.7866111111111111,
|
| 99120 |
+
"grad_norm": 0.10205078125,
|
| 99121 |
+
"learning_rate": 9.004021508820502e-05,
|
| 99122 |
+
"loss": 2.4965,
|
| 99123 |
+
"step": 14159
|
| 99124 |
+
},
|
| 99125 |
+
{
|
| 99126 |
+
"epoch": 0.7866666666666666,
|
| 99127 |
+
"grad_norm": 0.10009765625,
|
| 99128 |
+
"learning_rate": 8.999519161308238e-05,
|
| 99129 |
+
"loss": 2.3437,
|
| 99130 |
+
"step": 14160
|
| 99131 |
+
},
|
| 99132 |
+
{
|
| 99133 |
+
"epoch": 0.7867222222222222,
|
| 99134 |
+
"grad_norm": 0.10205078125,
|
| 99135 |
+
"learning_rate": 8.995017797061933e-05,
|
| 99136 |
+
"loss": 2.3523,
|
| 99137 |
+
"step": 14161
|
| 99138 |
+
},
|
| 99139 |
+
{
|
| 99140 |
+
"epoch": 0.7867777777777778,
|
| 99141 |
+
"grad_norm": 0.1015625,
|
| 99142 |
+
"learning_rate": 8.990517416224378e-05,
|
| 99143 |
+
"loss": 2.4765,
|
| 99144 |
+
"step": 14162
|
| 99145 |
+
},
|
| 99146 |
+
{
|
| 99147 |
+
"epoch": 0.7868333333333334,
|
| 99148 |
+
"grad_norm": 0.10107421875,
|
| 99149 |
+
"learning_rate": 8.98601801893833e-05,
|
| 99150 |
+
"loss": 2.4644,
|
| 99151 |
+
"step": 14163
|
| 99152 |
+
},
|
| 99153 |
+
{
|
| 99154 |
+
"epoch": 0.7868888888888889,
|
| 99155 |
+
"grad_norm": 0.1005859375,
|
| 99156 |
+
"learning_rate": 8.981519605346482e-05,
|
| 99157 |
+
"loss": 2.382,
|
| 99158 |
+
"step": 14164
|
| 99159 |
+
},
|
| 99160 |
+
{
|
| 99161 |
+
"epoch": 0.7869444444444444,
|
| 99162 |
+
"grad_norm": 0.10107421875,
|
| 99163 |
+
"learning_rate": 8.977022175591509e-05,
|
| 99164 |
+
"loss": 2.3857,
|
| 99165 |
+
"step": 14165
|
| 99166 |
+
},
|
| 99167 |
+
{
|
| 99168 |
+
"epoch": 0.787,
|
| 99169 |
+
"grad_norm": 0.1025390625,
|
| 99170 |
+
"learning_rate": 8.972525729816083e-05,
|
| 99171 |
+
"loss": 2.4829,
|
| 99172 |
+
"step": 14166
|
| 99173 |
+
},
|
| 99174 |
+
{
|
| 99175 |
+
"epoch": 0.7870555555555555,
|
| 99176 |
+
"grad_norm": 0.10009765625,
|
| 99177 |
+
"learning_rate": 8.968030268162806e-05,
|
| 99178 |
+
"loss": 2.3469,
|
| 99179 |
+
"step": 14167
|
| 99180 |
+
},
|
| 99181 |
+
{
|
| 99182 |
+
"epoch": 0.7871111111111111,
|
| 99183 |
+
"grad_norm": 0.09912109375,
|
| 99184 |
+
"learning_rate": 8.963535790774255e-05,
|
| 99185 |
+
"loss": 2.3302,
|
| 99186 |
+
"step": 14168
|
| 99187 |
+
},
|
| 99188 |
+
{
|
| 99189 |
+
"epoch": 0.7871666666666667,
|
| 99190 |
+
"grad_norm": 0.1005859375,
|
| 99191 |
+
"learning_rate": 8.959042297792994e-05,
|
| 99192 |
+
"loss": 2.432,
|
| 99193 |
+
"step": 14169
|
| 99194 |
+
},
|
| 99195 |
+
{
|
| 99196 |
+
"epoch": 0.7872222222222223,
|
| 99197 |
+
"grad_norm": 0.1015625,
|
| 99198 |
+
"learning_rate": 8.954549789361558e-05,
|
| 99199 |
+
"loss": 2.4612,
|
| 99200 |
+
"step": 14170
|
| 99201 |
+
},
|
| 99202 |
+
{
|
| 99203 |
+
"epoch": 0.7872777777777777,
|
| 99204 |
+
"grad_norm": 0.10107421875,
|
| 99205 |
+
"learning_rate": 8.950058265622425e-05,
|
| 99206 |
+
"loss": 2.3962,
|
| 99207 |
+
"step": 14171
|
| 99208 |
+
},
|
| 99209 |
+
{
|
| 99210 |
+
"epoch": 0.7873333333333333,
|
| 99211 |
+
"grad_norm": 0.099609375,
|
| 99212 |
+
"learning_rate": 8.945567726718053e-05,
|
| 99213 |
+
"loss": 2.3662,
|
| 99214 |
+
"step": 14172
|
| 99215 |
+
},
|
| 99216 |
+
{
|
| 99217 |
+
"epoch": 0.7873888888888889,
|
| 99218 |
+
"grad_norm": 0.099609375,
|
| 99219 |
+
"learning_rate": 8.941078172790893e-05,
|
| 99220 |
+
"loss": 2.4261,
|
| 99221 |
+
"step": 14173
|
| 99222 |
+
},
|
| 99223 |
+
{
|
| 99224 |
+
"epoch": 0.7874444444444444,
|
| 99225 |
+
"grad_norm": 0.10107421875,
|
| 99226 |
+
"learning_rate": 8.936589603983323e-05,
|
| 99227 |
+
"loss": 2.3865,
|
| 99228 |
+
"step": 14174
|
| 99229 |
+
},
|
| 99230 |
+
{
|
| 99231 |
+
"epoch": 0.7875,
|
| 99232 |
+
"grad_norm": 0.09912109375,
|
| 99233 |
+
"learning_rate": 8.932102020437709e-05,
|
| 99234 |
+
"loss": 2.3238,
|
| 99235 |
+
"step": 14175
|
| 99236 |
+
},
|
| 99237 |
+
{
|
| 99238 |
+
"epoch": 0.7875555555555556,
|
| 99239 |
+
"grad_norm": 0.09912109375,
|
| 99240 |
+
"learning_rate": 8.927615422296405e-05,
|
| 99241 |
+
"loss": 2.3572,
|
| 99242 |
+
"step": 14176
|
| 99243 |
+
},
|
| 99244 |
+
{
|
| 99245 |
+
"epoch": 0.7876111111111112,
|
| 99246 |
+
"grad_norm": 0.1025390625,
|
| 99247 |
+
"learning_rate": 8.923129809701696e-05,
|
| 99248 |
+
"loss": 2.4635,
|
| 99249 |
+
"step": 14177
|
| 99250 |
+
},
|
| 99251 |
+
{
|
| 99252 |
+
"epoch": 0.7876666666666666,
|
| 99253 |
+
"grad_norm": 0.1005859375,
|
| 99254 |
+
"learning_rate": 8.918645182795878e-05,
|
| 99255 |
+
"loss": 2.3734,
|
| 99256 |
+
"step": 14178
|
| 99257 |
+
},
|
| 99258 |
+
{
|
| 99259 |
+
"epoch": 0.7877222222222222,
|
| 99260 |
+
"grad_norm": 0.10107421875,
|
| 99261 |
+
"learning_rate": 8.914161541721165e-05,
|
| 99262 |
+
"loss": 2.3676,
|
| 99263 |
+
"step": 14179
|
| 99264 |
+
},
|
| 99265 |
+
{
|
| 99266 |
+
"epoch": 0.7877777777777778,
|
| 99267 |
+
"grad_norm": 0.10205078125,
|
| 99268 |
+
"learning_rate": 8.909678886619794e-05,
|
| 99269 |
+
"loss": 2.4379,
|
| 99270 |
+
"step": 14180
|
| 99271 |
+
},
|
| 99272 |
+
{
|
| 99273 |
+
"epoch": 0.7878333333333334,
|
| 99274 |
+
"grad_norm": 0.10107421875,
|
| 99275 |
+
"learning_rate": 8.905197217633939e-05,
|
| 99276 |
+
"loss": 2.3756,
|
| 99277 |
+
"step": 14181
|
| 99278 |
+
},
|
| 99279 |
+
{
|
| 99280 |
+
"epoch": 0.7878888888888889,
|
| 99281 |
+
"grad_norm": 0.1015625,
|
| 99282 |
+
"learning_rate": 8.90071653490573e-05,
|
| 99283 |
+
"loss": 2.3932,
|
| 99284 |
+
"step": 14182
|
| 99285 |
+
},
|
| 99286 |
+
{
|
| 99287 |
+
"epoch": 0.7879444444444444,
|
| 99288 |
+
"grad_norm": 0.09814453125,
|
| 99289 |
+
"learning_rate": 8.896236838577312e-05,
|
| 99290 |
+
"loss": 2.2767,
|
| 99291 |
+
"step": 14183
|
| 99292 |
+
},
|
| 99293 |
+
{
|
| 99294 |
+
"epoch": 0.788,
|
| 99295 |
+
"grad_norm": 0.099609375,
|
| 99296 |
+
"learning_rate": 8.891758128790746e-05,
|
| 99297 |
+
"loss": 2.4661,
|
| 99298 |
+
"step": 14184
|
| 99299 |
+
},
|
| 99300 |
+
{
|
| 99301 |
+
"epoch": 0.7880555555555555,
|
| 99302 |
+
"grad_norm": 0.10107421875,
|
| 99303 |
+
"learning_rate": 8.887280405688105e-05,
|
| 99304 |
+
"loss": 2.3323,
|
| 99305 |
+
"step": 14185
|
| 99306 |
+
},
|
| 99307 |
+
{
|
| 99308 |
+
"epoch": 0.7881111111111111,
|
| 99309 |
+
"grad_norm": 0.1005859375,
|
| 99310 |
+
"learning_rate": 8.882803669411411e-05,
|
| 99311 |
+
"loss": 2.3166,
|
| 99312 |
+
"step": 14186
|
| 99313 |
+
},
|
| 99314 |
+
{
|
| 99315 |
+
"epoch": 0.7881666666666667,
|
| 99316 |
+
"grad_norm": 0.099609375,
|
| 99317 |
+
"learning_rate": 8.878327920102636e-05,
|
| 99318 |
+
"loss": 2.3755,
|
| 99319 |
+
"step": 14187
|
| 99320 |
+
},
|
| 99321 |
+
{
|
| 99322 |
+
"epoch": 0.7882222222222223,
|
| 99323 |
+
"grad_norm": 0.10400390625,
|
| 99324 |
+
"learning_rate": 8.873853157903753e-05,
|
| 99325 |
+
"loss": 2.543,
|
| 99326 |
+
"step": 14188
|
| 99327 |
+
},
|
| 99328 |
+
{
|
| 99329 |
+
"epoch": 0.7882777777777777,
|
| 99330 |
+
"grad_norm": 0.0986328125,
|
| 99331 |
+
"learning_rate": 8.86937938295671e-05,
|
| 99332 |
+
"loss": 2.2914,
|
| 99333 |
+
"step": 14189
|
| 99334 |
+
},
|
| 99335 |
+
{
|
| 99336 |
+
"epoch": 0.7883333333333333,
|
| 99337 |
+
"grad_norm": 0.10009765625,
|
| 99338 |
+
"learning_rate": 8.864906595403386e-05,
|
| 99339 |
+
"loss": 2.3119,
|
| 99340 |
+
"step": 14190
|
| 99341 |
+
},
|
| 99342 |
+
{
|
| 99343 |
+
"epoch": 0.7883888888888889,
|
| 99344 |
+
"grad_norm": 0.09912109375,
|
| 99345 |
+
"learning_rate": 8.86043479538564e-05,
|
| 99346 |
+
"loss": 2.3723,
|
| 99347 |
+
"step": 14191
|
| 99348 |
+
},
|
| 99349 |
+
{
|
| 99350 |
+
"epoch": 0.7884444444444444,
|
| 99351 |
+
"grad_norm": 0.10107421875,
|
| 99352 |
+
"learning_rate": 8.855963983045326e-05,
|
| 99353 |
+
"loss": 2.5,
|
| 99354 |
+
"step": 14192
|
| 99355 |
+
},
|
| 99356 |
+
{
|
| 99357 |
+
"epoch": 0.7885,
|
| 99358 |
+
"grad_norm": 0.10205078125,
|
| 99359 |
+
"learning_rate": 8.851494158524243e-05,
|
| 99360 |
+
"loss": 2.3008,
|
| 99361 |
+
"step": 14193
|
| 99362 |
+
},
|
| 99363 |
+
{
|
| 99364 |
+
"epoch": 0.7885555555555556,
|
| 99365 |
+
"grad_norm": 0.10107421875,
|
| 99366 |
+
"learning_rate": 8.847025321964153e-05,
|
| 99367 |
+
"loss": 2.3757,
|
| 99368 |
+
"step": 14194
|
| 99369 |
+
},
|
| 99370 |
+
{
|
| 99371 |
+
"epoch": 0.7886111111111112,
|
| 99372 |
+
"grad_norm": 0.10107421875,
|
| 99373 |
+
"learning_rate": 8.8425574735068e-05,
|
| 99374 |
+
"loss": 2.4253,
|
| 99375 |
+
"step": 14195
|
| 99376 |
+
},
|
| 99377 |
+
{
|
| 99378 |
+
"epoch": 0.7886666666666666,
|
| 99379 |
+
"grad_norm": 0.099609375,
|
| 99380 |
+
"learning_rate": 8.838090613293926e-05,
|
| 99381 |
+
"loss": 2.363,
|
| 99382 |
+
"step": 14196
|
| 99383 |
+
},
|
| 99384 |
+
{
|
| 99385 |
+
"epoch": 0.7887222222222222,
|
| 99386 |
+
"grad_norm": 0.099609375,
|
| 99387 |
+
"learning_rate": 8.833624741467161e-05,
|
| 99388 |
+
"loss": 2.3303,
|
| 99389 |
+
"step": 14197
|
| 99390 |
+
},
|
| 99391 |
+
{
|
| 99392 |
+
"epoch": 0.7887777777777778,
|
| 99393 |
+
"grad_norm": 0.09912109375,
|
| 99394 |
+
"learning_rate": 8.829159858168172e-05,
|
| 99395 |
+
"loss": 2.3244,
|
| 99396 |
+
"step": 14198
|
| 99397 |
+
},
|
| 99398 |
+
{
|
| 99399 |
+
"epoch": 0.7888333333333334,
|
| 99400 |
+
"grad_norm": 0.10205078125,
|
| 99401 |
+
"learning_rate": 8.824695963538592e-05,
|
| 99402 |
+
"loss": 2.4332,
|
| 99403 |
+
"step": 14199
|
| 99404 |
+
},
|
| 99405 |
+
{
|
| 99406 |
+
"epoch": 0.7888888888888889,
|
| 99407 |
+
"grad_norm": 0.09912109375,
|
| 99408 |
+
"learning_rate": 8.820233057719991e-05,
|
| 99409 |
+
"loss": 2.3842,
|
| 99410 |
+
"step": 14200
|
| 99411 |
+
},
|
| 99412 |
+
{
|
| 99413 |
+
"epoch": 0.7889444444444444,
|
| 99414 |
+
"grad_norm": 0.0986328125,
|
| 99415 |
+
"learning_rate": 8.815771140853915e-05,
|
| 99416 |
+
"loss": 2.2159,
|
| 99417 |
+
"step": 14201
|
| 99418 |
+
},
|
| 99419 |
+
{
|
| 99420 |
+
"epoch": 0.789,
|
| 99421 |
+
"grad_norm": 0.099609375,
|
| 99422 |
+
"learning_rate": 8.811310213081889e-05,
|
| 99423 |
+
"loss": 2.3086,
|
| 99424 |
+
"step": 14202
|
| 99425 |
+
},
|
| 99426 |
+
{
|
| 99427 |
+
"epoch": 0.7890555555555555,
|
| 99428 |
+
"grad_norm": 0.1015625,
|
| 99429 |
+
"learning_rate": 8.806850274545437e-05,
|
| 99430 |
+
"loss": 2.4468,
|
| 99431 |
+
"step": 14203
|
| 99432 |
+
},
|
| 99433 |
+
{
|
| 99434 |
+
"epoch": 0.7891111111111111,
|
| 99435 |
+
"grad_norm": 0.10205078125,
|
| 99436 |
+
"learning_rate": 8.802391325385966e-05,
|
| 99437 |
+
"loss": 2.4487,
|
| 99438 |
+
"step": 14204
|
| 99439 |
+
},
|
| 99440 |
+
{
|
| 99441 |
+
"epoch": 0.7891666666666667,
|
| 99442 |
+
"grad_norm": 0.10009765625,
|
| 99443 |
+
"learning_rate": 8.797933365744926e-05,
|
| 99444 |
+
"loss": 2.3813,
|
| 99445 |
+
"step": 14205
|
| 99446 |
+
},
|
| 99447 |
+
{
|
| 99448 |
+
"epoch": 0.7892222222222223,
|
| 99449 |
+
"grad_norm": 0.099609375,
|
| 99450 |
+
"learning_rate": 8.793476395763737e-05,
|
| 99451 |
+
"loss": 2.3455,
|
| 99452 |
+
"step": 14206
|
| 99453 |
+
},
|
| 99454 |
+
{
|
| 99455 |
+
"epoch": 0.7892777777777777,
|
| 99456 |
+
"grad_norm": 0.099609375,
|
| 99457 |
+
"learning_rate": 8.789020415583724e-05,
|
| 99458 |
+
"loss": 2.4121,
|
| 99459 |
+
"step": 14207
|
| 99460 |
+
},
|
| 99461 |
+
{
|
| 99462 |
+
"epoch": 0.7893333333333333,
|
| 99463 |
+
"grad_norm": 0.10107421875,
|
| 99464 |
+
"learning_rate": 8.784565425346238e-05,
|
| 99465 |
+
"loss": 2.4403,
|
| 99466 |
+
"step": 14208
|
| 99467 |
+
},
|
| 99468 |
+
{
|
| 99469 |
+
"epoch": 0.7893888888888889,
|
| 99470 |
+
"grad_norm": 0.10107421875,
|
| 99471 |
+
"learning_rate": 8.780111425192599e-05,
|
| 99472 |
+
"loss": 2.3688,
|
| 99473 |
+
"step": 14209
|
| 99474 |
+
},
|
| 99475 |
+
{
|
| 99476 |
+
"epoch": 0.7894444444444444,
|
| 99477 |
+
"grad_norm": 0.1005859375,
|
| 99478 |
+
"learning_rate": 8.775658415264057e-05,
|
| 99479 |
+
"loss": 2.3252,
|
| 99480 |
+
"step": 14210
|
| 99481 |
+
},
|
| 99482 |
+
{
|
| 99483 |
+
"epoch": 0.7895,
|
| 99484 |
+
"grad_norm": 0.10009765625,
|
| 99485 |
+
"learning_rate": 8.771206395701845e-05,
|
| 99486 |
+
"loss": 2.3916,
|
| 99487 |
+
"step": 14211
|
| 99488 |
+
},
|
| 99489 |
+
{
|
| 99490 |
+
"epoch": 0.7895555555555556,
|
| 99491 |
+
"grad_norm": 0.10009765625,
|
| 99492 |
+
"learning_rate": 8.766755366647181e-05,
|
| 99493 |
+
"loss": 2.3584,
|
| 99494 |
+
"step": 14212
|
| 99495 |
+
},
|
| 99496 |
+
{
|
| 99497 |
+
"epoch": 0.7896111111111112,
|
| 99498 |
+
"grad_norm": 0.09814453125,
|
| 99499 |
+
"learning_rate": 8.762305328241271e-05,
|
| 99500 |
+
"loss": 2.3371,
|
| 99501 |
+
"step": 14213
|
| 99502 |
+
},
|
| 99503 |
+
{
|
| 99504 |
+
"epoch": 0.7896666666666666,
|
| 99505 |
+
"grad_norm": 0.0986328125,
|
| 99506 |
+
"learning_rate": 8.757856280625208e-05,
|
| 99507 |
+
"loss": 2.3202,
|
| 99508 |
+
"step": 14214
|
| 99509 |
+
},
|
| 99510 |
+
{
|
| 99511 |
+
"epoch": 0.7897222222222222,
|
| 99512 |
+
"grad_norm": 0.1015625,
|
| 99513 |
+
"learning_rate": 8.753408223940129e-05,
|
| 99514 |
+
"loss": 2.3664,
|
| 99515 |
+
"step": 14215
|
| 99516 |
+
},
|
| 99517 |
+
{
|
| 99518 |
+
"epoch": 0.7897777777777778,
|
| 99519 |
+
"grad_norm": 0.103515625,
|
| 99520 |
+
"learning_rate": 8.748961158327129e-05,
|
| 99521 |
+
"loss": 2.4754,
|
| 99522 |
+
"step": 14216
|
| 99523 |
+
},
|
| 99524 |
+
{
|
| 99525 |
+
"epoch": 0.7898333333333334,
|
| 99526 |
+
"grad_norm": 0.103515625,
|
| 99527 |
+
"learning_rate": 8.74451508392725e-05,
|
| 99528 |
+
"loss": 2.4473,
|
| 99529 |
+
"step": 14217
|
| 99530 |
+
},
|
| 99531 |
+
{
|
| 99532 |
+
"epoch": 0.7898888888888889,
|
| 99533 |
+
"grad_norm": 0.0986328125,
|
| 99534 |
+
"learning_rate": 8.740070000881498e-05,
|
| 99535 |
+
"loss": 2.3261,
|
| 99536 |
+
"step": 14218
|
| 99537 |
+
},
|
| 99538 |
+
{
|
| 99539 |
+
"epoch": 0.7899444444444444,
|
| 99540 |
+
"grad_norm": 0.10009765625,
|
| 99541 |
+
"learning_rate": 8.73562590933088e-05,
|
| 99542 |
+
"loss": 2.3823,
|
| 99543 |
+
"step": 14219
|
| 99544 |
+
},
|
| 99545 |
+
{
|
| 99546 |
+
"epoch": 0.79,
|
| 99547 |
+
"grad_norm": 0.099609375,
|
| 99548 |
+
"learning_rate": 8.731182809416348e-05,
|
| 99549 |
+
"loss": 2.3239,
|
| 99550 |
+
"step": 14220
|
| 99551 |
+
},
|
| 99552 |
+
{
|
| 99553 |
+
"epoch": 0.7900555555555555,
|
| 99554 |
+
"grad_norm": 0.10302734375,
|
| 99555 |
+
"learning_rate": 8.726740701278809e-05,
|
| 99556 |
+
"loss": 2.4208,
|
| 99557 |
+
"step": 14221
|
| 99558 |
+
},
|
| 99559 |
+
{
|
| 99560 |
+
"epoch": 0.7901111111111111,
|
| 99561 |
+
"grad_norm": 0.099609375,
|
| 99562 |
+
"learning_rate": 8.722299585059177e-05,
|
| 99563 |
+
"loss": 2.3869,
|
| 99564 |
+
"step": 14222
|
| 99565 |
+
},
|
| 99566 |
+
{
|
| 99567 |
+
"epoch": 0.7901666666666667,
|
| 99568 |
+
"grad_norm": 0.099609375,
|
| 99569 |
+
"learning_rate": 8.717859460898318e-05,
|
| 99570 |
+
"loss": 2.3926,
|
| 99571 |
+
"step": 14223
|
| 99572 |
+
},
|
| 99573 |
+
{
|
| 99574 |
+
"epoch": 0.7902222222222223,
|
| 99575 |
+
"grad_norm": 0.0986328125,
|
| 99576 |
+
"learning_rate": 8.713420328937054e-05,
|
| 99577 |
+
"loss": 2.3418,
|
| 99578 |
+
"step": 14224
|
| 99579 |
+
},
|
| 99580 |
+
{
|
| 99581 |
+
"epoch": 0.7902777777777777,
|
| 99582 |
+
"grad_norm": 0.10009765625,
|
| 99583 |
+
"learning_rate": 8.708982189316173e-05,
|
| 99584 |
+
"loss": 2.309,
|
| 99585 |
+
"step": 14225
|
| 99586 |
+
},
|
| 99587 |
+
{
|
| 99588 |
+
"epoch": 0.7903333333333333,
|
| 99589 |
+
"grad_norm": 0.1005859375,
|
| 99590 |
+
"learning_rate": 8.704545042176468e-05,
|
| 99591 |
+
"loss": 2.3225,
|
| 99592 |
+
"step": 14226
|
| 99593 |
+
},
|
| 99594 |
+
{
|
| 99595 |
+
"epoch": 0.7903888888888889,
|
| 99596 |
+
"grad_norm": 0.10009765625,
|
| 99597 |
+
"learning_rate": 8.70010888765866e-05,
|
| 99598 |
+
"loss": 2.4848,
|
| 99599 |
+
"step": 14227
|
| 99600 |
+
},
|
| 99601 |
+
{
|
| 99602 |
+
"epoch": 0.7904444444444444,
|
| 99603 |
+
"grad_norm": 0.0986328125,
|
| 99604 |
+
"learning_rate": 8.695673725903443e-05,
|
| 99605 |
+
"loss": 2.3313,
|
| 99606 |
+
"step": 14228
|
| 99607 |
+
},
|
| 99608 |
+
{
|
| 99609 |
+
"epoch": 0.7905,
|
| 99610 |
+
"grad_norm": 0.10009765625,
|
| 99611 |
+
"learning_rate": 8.691239557051516e-05,
|
| 99612 |
+
"loss": 2.3843,
|
| 99613 |
+
"step": 14229
|
| 99614 |
+
},
|
| 99615 |
+
{
|
| 99616 |
+
"epoch": 0.7905555555555556,
|
| 99617 |
+
"grad_norm": 0.10009765625,
|
| 99618 |
+
"learning_rate": 8.686806381243497e-05,
|
| 99619 |
+
"loss": 2.4009,
|
| 99620 |
+
"step": 14230
|
| 99621 |
+
},
|
| 99622 |
+
{
|
| 99623 |
+
"epoch": 0.7906111111111112,
|
| 99624 |
+
"grad_norm": 0.09912109375,
|
| 99625 |
+
"learning_rate": 8.682374198620017e-05,
|
| 99626 |
+
"loss": 2.333,
|
| 99627 |
+
"step": 14231
|
| 99628 |
+
},
|
| 99629 |
+
{
|
| 99630 |
+
"epoch": 0.7906666666666666,
|
| 99631 |
+
"grad_norm": 0.103515625,
|
| 99632 |
+
"learning_rate": 8.677943009321636e-05,
|
| 99633 |
+
"loss": 2.4237,
|
| 99634 |
+
"step": 14232
|
| 99635 |
+
},
|
| 99636 |
+
{
|
| 99637 |
+
"epoch": 0.7907222222222222,
|
| 99638 |
+
"grad_norm": 0.099609375,
|
| 99639 |
+
"learning_rate": 8.673512813488924e-05,
|
| 99640 |
+
"loss": 2.3802,
|
| 99641 |
+
"step": 14233
|
| 99642 |
+
},
|
| 99643 |
+
{
|
| 99644 |
+
"epoch": 0.7907777777777778,
|
| 99645 |
+
"grad_norm": 0.09765625,
|
| 99646 |
+
"learning_rate": 8.669083611262378e-05,
|
| 99647 |
+
"loss": 2.2485,
|
| 99648 |
+
"step": 14234
|
| 99649 |
+
},
|
| 99650 |
+
{
|
| 99651 |
+
"epoch": 0.7908333333333334,
|
| 99652 |
+
"grad_norm": 0.099609375,
|
| 99653 |
+
"learning_rate": 8.664655402782482e-05,
|
| 99654 |
+
"loss": 2.3281,
|
| 99655 |
+
"step": 14235
|
| 99656 |
+
},
|
| 99657 |
+
{
|
| 99658 |
+
"epoch": 0.7908888888888889,
|
| 99659 |
+
"grad_norm": 0.0986328125,
|
| 99660 |
+
"learning_rate": 8.660228188189705e-05,
|
| 99661 |
+
"loss": 2.457,
|
| 99662 |
+
"step": 14236
|
| 99663 |
+
},
|
| 99664 |
+
{
|
| 99665 |
+
"epoch": 0.7909444444444444,
|
| 99666 |
+
"grad_norm": 0.10009765625,
|
| 99667 |
+
"learning_rate": 8.655801967624446e-05,
|
| 99668 |
+
"loss": 2.435,
|
| 99669 |
+
"step": 14237
|
| 99670 |
+
},
|
| 99671 |
+
{
|
| 99672 |
+
"epoch": 0.791,
|
| 99673 |
+
"grad_norm": 0.09912109375,
|
| 99674 |
+
"learning_rate": 8.651376741227117e-05,
|
| 99675 |
+
"loss": 2.2942,
|
| 99676 |
+
"step": 14238
|
| 99677 |
+
},
|
| 99678 |
+
{
|
| 99679 |
+
"epoch": 0.7910555555555555,
|
| 99680 |
+
"grad_norm": 0.10009765625,
|
| 99681 |
+
"learning_rate": 8.646952509138068e-05,
|
| 99682 |
+
"loss": 2.3699,
|
| 99683 |
+
"step": 14239
|
| 99684 |
+
},
|
| 99685 |
+
{
|
| 99686 |
+
"epoch": 0.7911111111111111,
|
| 99687 |
+
"grad_norm": 0.10009765625,
|
| 99688 |
+
"learning_rate": 8.64252927149761e-05,
|
| 99689 |
+
"loss": 2.3984,
|
| 99690 |
+
"step": 14240
|
| 99691 |
+
},
|
| 99692 |
+
{
|
| 99693 |
+
"epoch": 0.7911666666666667,
|
| 99694 |
+
"grad_norm": 0.1005859375,
|
| 99695 |
+
"learning_rate": 8.638107028446065e-05,
|
| 99696 |
+
"loss": 2.4197,
|
| 99697 |
+
"step": 14241
|
| 99698 |
+
},
|
| 99699 |
+
{
|
| 99700 |
+
"epoch": 0.7912222222222223,
|
| 99701 |
+
"grad_norm": 0.09912109375,
|
| 99702 |
+
"learning_rate": 8.633685780123678e-05,
|
| 99703 |
+
"loss": 2.3365,
|
| 99704 |
+
"step": 14242
|
| 99705 |
+
},
|
| 99706 |
+
{
|
| 99707 |
+
"epoch": 0.7912777777777777,
|
| 99708 |
+
"grad_norm": 0.10107421875,
|
| 99709 |
+
"learning_rate": 8.629265526670694e-05,
|
| 99710 |
+
"loss": 2.3084,
|
| 99711 |
+
"step": 14243
|
| 99712 |
+
},
|
| 99713 |
+
{
|
| 99714 |
+
"epoch": 0.7913333333333333,
|
| 99715 |
+
"grad_norm": 0.09814453125,
|
| 99716 |
+
"learning_rate": 8.624846268227295e-05,
|
| 99717 |
+
"loss": 2.2836,
|
| 99718 |
+
"step": 14244
|
| 99719 |
+
},
|
| 99720 |
+
{
|
| 99721 |
+
"epoch": 0.7913888888888889,
|
| 99722 |
+
"grad_norm": 0.099609375,
|
| 99723 |
+
"learning_rate": 8.620428004933674e-05,
|
| 99724 |
+
"loss": 2.3158,
|
| 99725 |
+
"step": 14245
|
| 99726 |
+
},
|
| 99727 |
+
{
|
| 99728 |
+
"epoch": 0.7914444444444444,
|
| 99729 |
+
"grad_norm": 0.0986328125,
|
| 99730 |
+
"learning_rate": 8.616010736929956e-05,
|
| 99731 |
+
"loss": 2.3667,
|
| 99732 |
+
"step": 14246
|
| 99733 |
+
},
|
| 99734 |
+
{
|
| 99735 |
+
"epoch": 0.7915,
|
| 99736 |
+
"grad_norm": 0.10107421875,
|
| 99737 |
+
"learning_rate": 8.611594464356235e-05,
|
| 99738 |
+
"loss": 2.3983,
|
| 99739 |
+
"step": 14247
|
| 99740 |
+
},
|
| 99741 |
+
{
|
| 99742 |
+
"epoch": 0.7915555555555556,
|
| 99743 |
+
"grad_norm": 0.099609375,
|
| 99744 |
+
"learning_rate": 8.60717918735261e-05,
|
| 99745 |
+
"loss": 2.3675,
|
| 99746 |
+
"step": 14248
|
| 99747 |
+
},
|
| 99748 |
+
{
|
| 99749 |
+
"epoch": 0.7916111111111112,
|
| 99750 |
+
"grad_norm": 0.09912109375,
|
| 99751 |
+
"learning_rate": 8.602764906059109e-05,
|
| 99752 |
+
"loss": 2.3188,
|
| 99753 |
+
"step": 14249
|
| 99754 |
+
},
|
| 99755 |
+
{
|
| 99756 |
+
"epoch": 0.7916666666666666,
|
| 99757 |
+
"grad_norm": 0.1005859375,
|
| 99758 |
+
"learning_rate": 8.598351620615735e-05,
|
| 99759 |
+
"loss": 2.3241,
|
| 99760 |
+
"step": 14250
|
| 99761 |
+
},
|
| 99762 |
+
{
|
| 99763 |
+
"epoch": 0.7917222222222222,
|
| 99764 |
+
"grad_norm": 0.10009765625,
|
| 99765 |
+
"learning_rate": 8.593939331162477e-05,
|
| 99766 |
+
"loss": 2.3462,
|
| 99767 |
+
"step": 14251
|
| 99768 |
+
},
|
| 99769 |
+
{
|
| 99770 |
+
"epoch": 0.7917777777777778,
|
| 99771 |
+
"grad_norm": 0.09814453125,
|
| 99772 |
+
"learning_rate": 8.589528037839296e-05,
|
| 99773 |
+
"loss": 2.2788,
|
| 99774 |
+
"step": 14252
|
| 99775 |
+
},
|
| 99776 |
+
{
|
| 99777 |
+
"epoch": 0.7918333333333333,
|
| 99778 |
+
"grad_norm": 0.10107421875,
|
| 99779 |
+
"learning_rate": 8.585117740786097e-05,
|
| 99780 |
+
"loss": 2.4101,
|
| 99781 |
+
"step": 14253
|
| 99782 |
+
},
|
| 99783 |
+
{
|
| 99784 |
+
"epoch": 0.7918888888888889,
|
| 99785 |
+
"grad_norm": 0.1015625,
|
| 99786 |
+
"learning_rate": 8.580708440142751e-05,
|
| 99787 |
+
"loss": 2.4325,
|
| 99788 |
+
"step": 14254
|
| 99789 |
+
},
|
| 99790 |
+
{
|
| 99791 |
+
"epoch": 0.7919444444444445,
|
| 99792 |
+
"grad_norm": 0.099609375,
|
| 99793 |
+
"learning_rate": 8.576300136049136e-05,
|
| 99794 |
+
"loss": 2.4042,
|
| 99795 |
+
"step": 14255
|
| 99796 |
+
},
|
| 99797 |
+
{
|
| 99798 |
+
"epoch": 0.792,
|
| 99799 |
+
"grad_norm": 0.09765625,
|
| 99800 |
+
"learning_rate": 8.57189282864506e-05,
|
| 99801 |
+
"loss": 2.3708,
|
| 99802 |
+
"step": 14256
|
| 99803 |
+
},
|
| 99804 |
+
{
|
| 99805 |
+
"epoch": 0.7920555555555555,
|
| 99806 |
+
"grad_norm": 0.0986328125,
|
| 99807 |
+
"learning_rate": 8.567486518070307e-05,
|
| 99808 |
+
"loss": 2.3483,
|
| 99809 |
+
"step": 14257
|
| 99810 |
+
},
|
| 99811 |
+
{
|
| 99812 |
+
"epoch": 0.7921111111111111,
|
| 99813 |
+
"grad_norm": 0.10107421875,
|
| 99814 |
+
"learning_rate": 8.563081204464637e-05,
|
| 99815 |
+
"loss": 2.306,
|
| 99816 |
+
"step": 14258
|
| 99817 |
+
},
|
| 99818 |
+
{
|
| 99819 |
+
"epoch": 0.7921666666666667,
|
| 99820 |
+
"grad_norm": 0.10009765625,
|
| 99821 |
+
"learning_rate": 8.55867688796781e-05,
|
| 99822 |
+
"loss": 2.4136,
|
| 99823 |
+
"step": 14259
|
| 99824 |
+
},
|
| 99825 |
+
{
|
| 99826 |
+
"epoch": 0.7922222222222223,
|
| 99827 |
+
"grad_norm": 0.1005859375,
|
| 99828 |
+
"learning_rate": 8.554273568719469e-05,
|
| 99829 |
+
"loss": 2.4277,
|
| 99830 |
+
"step": 14260
|
| 99831 |
+
},
|
| 99832 |
+
{
|
| 99833 |
+
"epoch": 0.7922777777777777,
|
| 99834 |
+
"grad_norm": 0.1025390625,
|
| 99835 |
+
"learning_rate": 8.549871246859305e-05,
|
| 99836 |
+
"loss": 2.4098,
|
| 99837 |
+
"step": 14261
|
| 99838 |
+
},
|
| 99839 |
+
{
|
| 99840 |
+
"epoch": 0.7923333333333333,
|
| 99841 |
+
"grad_norm": 0.09814453125,
|
| 99842 |
+
"learning_rate": 8.545469922526952e-05,
|
| 99843 |
+
"loss": 2.3387,
|
| 99844 |
+
"step": 14262
|
| 99845 |
+
},
|
| 99846 |
+
{
|
| 99847 |
+
"epoch": 0.7923888888888889,
|
| 99848 |
+
"grad_norm": 0.099609375,
|
| 99849 |
+
"learning_rate": 8.54106959586201e-05,
|
| 99850 |
+
"loss": 2.2999,
|
| 99851 |
+
"step": 14263
|
| 99852 |
+
},
|
| 99853 |
+
{
|
| 99854 |
+
"epoch": 0.7924444444444444,
|
| 99855 |
+
"grad_norm": 0.10009765625,
|
| 99856 |
+
"learning_rate": 8.53667026700403e-05,
|
| 99857 |
+
"loss": 2.2989,
|
| 99858 |
+
"step": 14264
|
| 99859 |
+
},
|
| 99860 |
+
{
|
| 99861 |
+
"epoch": 0.7925,
|
| 99862 |
+
"grad_norm": 0.09912109375,
|
| 99863 |
+
"learning_rate": 8.532271936092575e-05,
|
| 99864 |
+
"loss": 2.292,
|
| 99865 |
+
"step": 14265
|
| 99866 |
+
},
|
| 99867 |
+
{
|
| 99868 |
+
"epoch": 0.7925555555555556,
|
| 99869 |
+
"grad_norm": 0.0986328125,
|
| 99870 |
+
"learning_rate": 8.527874603267134e-05,
|
| 99871 |
+
"loss": 2.3629,
|
| 99872 |
+
"step": 14266
|
| 99873 |
+
},
|
| 99874 |
+
{
|
| 99875 |
+
"epoch": 0.7926111111111112,
|
| 99876 |
+
"grad_norm": 0.10009765625,
|
| 99877 |
+
"learning_rate": 8.523478268667169e-05,
|
| 99878 |
+
"loss": 2.4197,
|
| 99879 |
+
"step": 14267
|
| 99880 |
+
},
|
| 99881 |
+
{
|
| 99882 |
+
"epoch": 0.7926666666666666,
|
| 99883 |
+
"grad_norm": 0.10009765625,
|
| 99884 |
+
"learning_rate": 8.51908293243214e-05,
|
| 99885 |
+
"loss": 2.4142,
|
| 99886 |
+
"step": 14268
|
| 99887 |
+
},
|
| 99888 |
+
{
|
| 99889 |
+
"epoch": 0.7927222222222222,
|
| 99890 |
+
"grad_norm": 0.099609375,
|
| 99891 |
+
"learning_rate": 8.514688594701463e-05,
|
| 99892 |
+
"loss": 2.2871,
|
| 99893 |
+
"step": 14269
|
| 99894 |
+
},
|
| 99895 |
+
{
|
| 99896 |
+
"epoch": 0.7927777777777778,
|
| 99897 |
+
"grad_norm": 0.09912109375,
|
| 99898 |
+
"learning_rate": 8.510295255614509e-05,
|
| 99899 |
+
"loss": 2.3324,
|
| 99900 |
+
"step": 14270
|
| 99901 |
+
},
|
| 99902 |
+
{
|
| 99903 |
+
"epoch": 0.7928333333333333,
|
| 99904 |
+
"grad_norm": 0.09912109375,
|
| 99905 |
+
"learning_rate": 8.50590291531061e-05,
|
| 99906 |
+
"loss": 2.3361,
|
| 99907 |
+
"step": 14271
|
| 99908 |
+
},
|
| 99909 |
+
{
|
| 99910 |
+
"epoch": 0.7928888888888889,
|
| 99911 |
+
"grad_norm": 0.0986328125,
|
| 99912 |
+
"learning_rate": 8.501511573929106e-05,
|
| 99913 |
+
"loss": 2.2668,
|
| 99914 |
+
"step": 14272
|
| 99915 |
+
},
|
| 99916 |
+
{
|
| 99917 |
+
"epoch": 0.7929444444444445,
|
| 99918 |
+
"grad_norm": 0.099609375,
|
| 99919 |
+
"learning_rate": 8.497121231609271e-05,
|
| 99920 |
+
"loss": 2.434,
|
| 99921 |
+
"step": 14273
|
| 99922 |
+
},
|
| 99923 |
+
{
|
| 99924 |
+
"epoch": 0.793,
|
| 99925 |
+
"grad_norm": 0.1025390625,
|
| 99926 |
+
"learning_rate": 8.49273188849034e-05,
|
| 99927 |
+
"loss": 2.4423,
|
| 99928 |
+
"step": 14274
|
| 99929 |
+
},
|
| 99930 |
+
{
|
| 99931 |
+
"epoch": 0.7930555555555555,
|
| 99932 |
+
"grad_norm": 0.0986328125,
|
| 99933 |
+
"learning_rate": 8.488343544711561e-05,
|
| 99934 |
+
"loss": 2.3806,
|
| 99935 |
+
"step": 14275
|
| 99936 |
+
},
|
| 99937 |
+
{
|
| 99938 |
+
"epoch": 0.7931111111111111,
|
| 99939 |
+
"grad_norm": 0.09814453125,
|
| 99940 |
+
"learning_rate": 8.483956200412095e-05,
|
| 99941 |
+
"loss": 2.2687,
|
| 99942 |
+
"step": 14276
|
| 99943 |
+
},
|
| 99944 |
+
{
|
| 99945 |
+
"epoch": 0.7931666666666667,
|
| 99946 |
+
"grad_norm": 0.0986328125,
|
| 99947 |
+
"learning_rate": 8.479569855731124e-05,
|
| 99948 |
+
"loss": 2.3898,
|
| 99949 |
+
"step": 14277
|
| 99950 |
+
},
|
| 99951 |
+
{
|
| 99952 |
+
"epoch": 0.7932222222222223,
|
| 99953 |
+
"grad_norm": 0.10009765625,
|
| 99954 |
+
"learning_rate": 8.475184510807754e-05,
|
| 99955 |
+
"loss": 2.3479,
|
| 99956 |
+
"step": 14278
|
| 99957 |
+
},
|
| 99958 |
+
{
|
| 99959 |
+
"epoch": 0.7932777777777777,
|
| 99960 |
+
"grad_norm": 0.0986328125,
|
| 99961 |
+
"learning_rate": 8.470800165781096e-05,
|
| 99962 |
+
"loss": 2.3262,
|
| 99963 |
+
"step": 14279
|
| 99964 |
+
},
|
| 99965 |
+
{
|
| 99966 |
+
"epoch": 0.7933333333333333,
|
| 99967 |
+
"grad_norm": 0.1005859375,
|
| 99968 |
+
"learning_rate": 8.4664168207902e-05,
|
| 99969 |
+
"loss": 2.3864,
|
| 99970 |
+
"step": 14280
|
| 99971 |
+
},
|
| 99972 |
+
{
|
| 99973 |
+
"epoch": 0.7933888888888889,
|
| 99974 |
+
"grad_norm": 0.099609375,
|
| 99975 |
+
"learning_rate": 8.46203447597409e-05,
|
| 99976 |
+
"loss": 2.2974,
|
| 99977 |
+
"step": 14281
|
| 99978 |
+
},
|
| 99979 |
+
{
|
| 99980 |
+
"epoch": 0.7934444444444444,
|
| 99981 |
+
"grad_norm": 0.0986328125,
|
| 99982 |
+
"learning_rate": 8.45765313147178e-05,
|
| 99983 |
+
"loss": 2.2832,
|
| 99984 |
+
"step": 14282
|
| 99985 |
+
},
|
| 99986 |
+
{
|
| 99987 |
+
"epoch": 0.7935,
|
| 99988 |
+
"grad_norm": 0.099609375,
|
| 99989 |
+
"learning_rate": 8.45327278742222e-05,
|
| 99990 |
+
"loss": 2.3436,
|
| 99991 |
+
"step": 14283
|
| 99992 |
+
},
|
| 99993 |
+
{
|
| 99994 |
+
"epoch": 0.7935555555555556,
|
| 99995 |
+
"grad_norm": 0.0986328125,
|
| 99996 |
+
"learning_rate": 8.44889344396436e-05,
|
| 99997 |
+
"loss": 2.3377,
|
| 99998 |
+
"step": 14284
|
| 99999 |
+
},
|
| 100000 |
+
{
|
| 100001 |
+
"epoch": 0.7936111111111112,
|
| 100002 |
+
"grad_norm": 0.099609375,
|
| 100003 |
+
"learning_rate": 8.444515101237098e-05,
|
| 100004 |
+
"loss": 2.3826,
|
| 100005 |
+
"step": 14285
|
| 100006 |
+
},
|
| 100007 |
+
{
|
| 100008 |
+
"epoch": 0.7936666666666666,
|
| 100009 |
+
"grad_norm": 0.09912109375,
|
| 100010 |
+
"learning_rate": 8.440137759379294e-05,
|
| 100011 |
+
"loss": 2.3593,
|
| 100012 |
+
"step": 14286
|
| 100013 |
+
},
|
| 100014 |
+
{
|
| 100015 |
+
"epoch": 0.7937222222222222,
|
| 100016 |
+
"grad_norm": 0.10107421875,
|
| 100017 |
+
"learning_rate": 8.435761418529811e-05,
|
| 100018 |
+
"loss": 2.2827,
|
| 100019 |
+
"step": 14287
|
| 100020 |
+
},
|
| 100021 |
+
{
|
| 100022 |
+
"epoch": 0.7937777777777778,
|
| 100023 |
+
"grad_norm": 0.09814453125,
|
| 100024 |
+
"learning_rate": 8.431386078827425e-05,
|
| 100025 |
+
"loss": 2.2985,
|
| 100026 |
+
"step": 14288
|
| 100027 |
+
},
|
| 100028 |
+
{
|
| 100029 |
+
"epoch": 0.7938333333333333,
|
| 100030 |
+
"grad_norm": 0.09912109375,
|
| 100031 |
+
"learning_rate": 8.427011740410944e-05,
|
| 100032 |
+
"loss": 2.3443,
|
| 100033 |
+
"step": 14289
|
| 100034 |
+
},
|
| 100035 |
+
{
|
| 100036 |
+
"epoch": 0.7938888888888889,
|
| 100037 |
+
"grad_norm": 0.09912109375,
|
| 100038 |
+
"learning_rate": 8.422638403419086e-05,
|
| 100039 |
+
"loss": 2.3745,
|
| 100040 |
+
"step": 14290
|
| 100041 |
+
},
|
| 100042 |
+
{
|
| 100043 |
+
"epoch": 0.7939444444444445,
|
| 100044 |
+
"grad_norm": 0.0986328125,
|
| 100045 |
+
"learning_rate": 8.418266067990587e-05,
|
| 100046 |
+
"loss": 2.2997,
|
| 100047 |
+
"step": 14291
|
| 100048 |
+
},
|
| 100049 |
+
{
|
| 100050 |
+
"epoch": 0.794,
|
| 100051 |
+
"grad_norm": 0.10107421875,
|
| 100052 |
+
"learning_rate": 8.413894734264114e-05,
|
| 100053 |
+
"loss": 2.4442,
|
| 100054 |
+
"step": 14292
|
| 100055 |
+
},
|
| 100056 |
+
{
|
| 100057 |
+
"epoch": 0.7940555555555555,
|
| 100058 |
+
"grad_norm": 0.099609375,
|
| 100059 |
+
"learning_rate": 8.409524402378309e-05,
|
| 100060 |
+
"loss": 2.4084,
|
| 100061 |
+
"step": 14293
|
| 100062 |
+
},
|
| 100063 |
+
{
|
| 100064 |
+
"epoch": 0.7941111111111111,
|
| 100065 |
+
"grad_norm": 0.09912109375,
|
| 100066 |
+
"learning_rate": 8.405155072471807e-05,
|
| 100067 |
+
"loss": 2.344,
|
| 100068 |
+
"step": 14294
|
| 100069 |
+
},
|
| 100070 |
+
{
|
| 100071 |
+
"epoch": 0.7941666666666667,
|
| 100072 |
+
"grad_norm": 0.0986328125,
|
| 100073 |
+
"learning_rate": 8.400786744683186e-05,
|
| 100074 |
+
"loss": 2.3329,
|
| 100075 |
+
"step": 14295
|
| 100076 |
+
},
|
| 100077 |
+
{
|
| 100078 |
+
"epoch": 0.7942222222222223,
|
| 100079 |
+
"grad_norm": 0.09912109375,
|
| 100080 |
+
"learning_rate": 8.396419419150988e-05,
|
| 100081 |
+
"loss": 2.283,
|
| 100082 |
+
"step": 14296
|
| 100083 |
+
},
|
| 100084 |
+
{
|
| 100085 |
+
"epoch": 0.7942777777777777,
|
| 100086 |
+
"grad_norm": 0.09912109375,
|
| 100087 |
+
"learning_rate": 8.392053096013745e-05,
|
| 100088 |
+
"loss": 2.3017,
|
| 100089 |
+
"step": 14297
|
| 100090 |
+
},
|
| 100091 |
+
{
|
| 100092 |
+
"epoch": 0.7943333333333333,
|
| 100093 |
+
"grad_norm": 0.1005859375,
|
| 100094 |
+
"learning_rate": 8.387687775409956e-05,
|
| 100095 |
+
"loss": 2.4177,
|
| 100096 |
+
"step": 14298
|
| 100097 |
+
},
|
| 100098 |
+
{
|
| 100099 |
+
"epoch": 0.7943888888888889,
|
| 100100 |
+
"grad_norm": 0.09912109375,
|
| 100101 |
+
"learning_rate": 8.383323457478071e-05,
|
| 100102 |
+
"loss": 2.4014,
|
| 100103 |
+
"step": 14299
|
| 100104 |
+
},
|
| 100105 |
+
{
|
| 100106 |
+
"epoch": 0.7944444444444444,
|
| 100107 |
+
"grad_norm": 0.0986328125,
|
| 100108 |
+
"learning_rate": 8.378960142356506e-05,
|
| 100109 |
+
"loss": 2.2986,
|
| 100110 |
+
"step": 14300
|
| 100111 |
+
},
|
| 100112 |
+
{
|
| 100113 |
+
"epoch": 0.7945,
|
| 100114 |
+
"grad_norm": 0.099609375,
|
| 100115 |
+
"learning_rate": 8.374597830183675e-05,
|
| 100116 |
+
"loss": 2.3829,
|
| 100117 |
+
"step": 14301
|
| 100118 |
+
},
|
| 100119 |
+
{
|
| 100120 |
+
"epoch": 0.7945555555555556,
|
| 100121 |
+
"grad_norm": 0.10009765625,
|
| 100122 |
+
"learning_rate": 8.370236521097932e-05,
|
| 100123 |
+
"loss": 2.3635,
|
| 100124 |
+
"step": 14302
|
| 100125 |
+
},
|
| 100126 |
+
{
|
| 100127 |
+
"epoch": 0.7946111111111112,
|
| 100128 |
+
"grad_norm": 0.10009765625,
|
| 100129 |
+
"learning_rate": 8.365876215237594e-05,
|
| 100130 |
+
"loss": 2.4118,
|
| 100131 |
+
"step": 14303
|
| 100132 |
+
},
|
| 100133 |
+
{
|
| 100134 |
+
"epoch": 0.7946666666666666,
|
| 100135 |
+
"grad_norm": 0.10302734375,
|
| 100136 |
+
"learning_rate": 8.361516912740971e-05,
|
| 100137 |
+
"loss": 2.4585,
|
| 100138 |
+
"step": 14304
|
| 100139 |
+
},
|
| 100140 |
+
{
|
| 100141 |
+
"epoch": 0.7947222222222222,
|
| 100142 |
+
"grad_norm": 0.10009765625,
|
| 100143 |
+
"learning_rate": 8.357158613746361e-05,
|
| 100144 |
+
"loss": 2.4084,
|
| 100145 |
+
"step": 14305
|
| 100146 |
+
},
|
| 100147 |
+
{
|
| 100148 |
+
"epoch": 0.7947777777777778,
|
| 100149 |
+
"grad_norm": 0.0986328125,
|
| 100150 |
+
"learning_rate": 8.352801318391943e-05,
|
| 100151 |
+
"loss": 2.2745,
|
| 100152 |
+
"step": 14306
|
| 100153 |
+
},
|
| 100154 |
+
{
|
| 100155 |
+
"epoch": 0.7948333333333333,
|
| 100156 |
+
"grad_norm": 0.09912109375,
|
| 100157 |
+
"learning_rate": 8.348445026815954e-05,
|
| 100158 |
+
"loss": 2.2908,
|
| 100159 |
+
"step": 14307
|
| 100160 |
+
},
|
| 100161 |
+
{
|
| 100162 |
+
"epoch": 0.7948888888888889,
|
| 100163 |
+
"grad_norm": 0.099609375,
|
| 100164 |
+
"learning_rate": 8.344089739156568e-05,
|
| 100165 |
+
"loss": 2.2923,
|
| 100166 |
+
"step": 14308
|
| 100167 |
+
},
|
| 100168 |
+
{
|
| 100169 |
+
"epoch": 0.7949444444444445,
|
| 100170 |
+
"grad_norm": 0.10107421875,
|
| 100171 |
+
"learning_rate": 8.339735455551916e-05,
|
| 100172 |
+
"loss": 2.4101,
|
| 100173 |
+
"step": 14309
|
| 100174 |
+
},
|
| 100175 |
+
{
|
| 100176 |
+
"epoch": 0.795,
|
| 100177 |
+
"grad_norm": 0.10107421875,
|
| 100178 |
+
"learning_rate": 8.335382176140099e-05,
|
| 100179 |
+
"loss": 2.4213,
|
| 100180 |
+
"step": 14310
|
| 100181 |
+
},
|
| 100182 |
+
{
|
| 100183 |
+
"epoch": 0.7950555555555555,
|
| 100184 |
+
"grad_norm": 0.09765625,
|
| 100185 |
+
"learning_rate": 8.331029901059215e-05,
|
| 100186 |
+
"loss": 2.3111,
|
| 100187 |
+
"step": 14311
|
| 100188 |
+
},
|
| 100189 |
+
{
|
| 100190 |
+
"epoch": 0.7951111111111111,
|
| 100191 |
+
"grad_norm": 0.099609375,
|
| 100192 |
+
"learning_rate": 8.32667863044729e-05,
|
| 100193 |
+
"loss": 2.2821,
|
| 100194 |
+
"step": 14312
|
| 100195 |
+
},
|
| 100196 |
+
{
|
| 100197 |
+
"epoch": 0.7951666666666667,
|
| 100198 |
+
"grad_norm": 0.10009765625,
|
| 100199 |
+
"learning_rate": 8.322328364442334e-05,
|
| 100200 |
+
"loss": 2.2831,
|
| 100201 |
+
"step": 14313
|
| 100202 |
+
},
|
| 100203 |
+
{
|
| 100204 |
+
"epoch": 0.7952222222222223,
|
| 100205 |
+
"grad_norm": 0.099609375,
|
| 100206 |
+
"learning_rate": 8.317979103182336e-05,
|
| 100207 |
+
"loss": 2.3548,
|
| 100208 |
+
"step": 14314
|
| 100209 |
+
},
|
| 100210 |
+
{
|
| 100211 |
+
"epoch": 0.7952777777777778,
|
| 100212 |
+
"grad_norm": 0.1015625,
|
| 100213 |
+
"learning_rate": 8.313630846805267e-05,
|
| 100214 |
+
"loss": 2.4243,
|
| 100215 |
+
"step": 14315
|
| 100216 |
+
},
|
| 100217 |
+
{
|
| 100218 |
+
"epoch": 0.7953333333333333,
|
| 100219 |
+
"grad_norm": 0.099609375,
|
| 100220 |
+
"learning_rate": 8.309283595448998e-05,
|
| 100221 |
+
"loss": 2.3774,
|
| 100222 |
+
"step": 14316
|
| 100223 |
+
},
|
| 100224 |
+
{
|
| 100225 |
+
"epoch": 0.7953888888888889,
|
| 100226 |
+
"grad_norm": 0.099609375,
|
| 100227 |
+
"learning_rate": 8.304937349251437e-05,
|
| 100228 |
+
"loss": 2.3476,
|
| 100229 |
+
"step": 14317
|
| 100230 |
+
},
|
| 100231 |
+
{
|
| 100232 |
+
"epoch": 0.7954444444444444,
|
| 100233 |
+
"grad_norm": 0.10107421875,
|
| 100234 |
+
"learning_rate": 8.30059210835045e-05,
|
| 100235 |
+
"loss": 2.3572,
|
| 100236 |
+
"step": 14318
|
| 100237 |
+
},
|
| 100238 |
+
{
|
| 100239 |
+
"epoch": 0.7955,
|
| 100240 |
+
"grad_norm": 0.1005859375,
|
| 100241 |
+
"learning_rate": 8.296247872883843e-05,
|
| 100242 |
+
"loss": 2.3289,
|
| 100243 |
+
"step": 14319
|
| 100244 |
+
},
|
| 100245 |
+
{
|
| 100246 |
+
"epoch": 0.7955555555555556,
|
| 100247 |
+
"grad_norm": 0.1005859375,
|
| 100248 |
+
"learning_rate": 8.291904642989404e-05,
|
| 100249 |
+
"loss": 2.3482,
|
| 100250 |
+
"step": 14320
|
| 100251 |
+
},
|
| 100252 |
+
{
|
| 100253 |
+
"epoch": 0.7956111111111112,
|
| 100254 |
+
"grad_norm": 0.099609375,
|
| 100255 |
+
"learning_rate": 8.287562418804897e-05,
|
| 100256 |
+
"loss": 2.3875,
|
| 100257 |
+
"step": 14321
|
| 100258 |
+
},
|
| 100259 |
+
{
|
| 100260 |
+
"epoch": 0.7956666666666666,
|
| 100261 |
+
"grad_norm": 0.1005859375,
|
| 100262 |
+
"learning_rate": 8.283221200468054e-05,
|
| 100263 |
+
"loss": 2.3832,
|
| 100264 |
+
"step": 14322
|
| 100265 |
+
},
|
| 100266 |
+
{
|
| 100267 |
+
"epoch": 0.7957222222222222,
|
| 100268 |
+
"grad_norm": 0.0986328125,
|
| 100269 |
+
"learning_rate": 8.278880988116546e-05,
|
| 100270 |
+
"loss": 2.333,
|
| 100271 |
+
"step": 14323
|
| 100272 |
+
},
|
| 100273 |
+
{
|
| 100274 |
+
"epoch": 0.7957777777777778,
|
| 100275 |
+
"grad_norm": 0.09912109375,
|
| 100276 |
+
"learning_rate": 8.27454178188805e-05,
|
| 100277 |
+
"loss": 2.2882,
|
| 100278 |
+
"step": 14324
|
| 100279 |
+
},
|
| 100280 |
+
{
|
| 100281 |
+
"epoch": 0.7958333333333333,
|
| 100282 |
+
"grad_norm": 0.099609375,
|
| 100283 |
+
"learning_rate": 8.270203581920206e-05,
|
| 100284 |
+
"loss": 2.3341,
|
| 100285 |
+
"step": 14325
|
| 100286 |
+
},
|
| 100287 |
+
{
|
| 100288 |
+
"epoch": 0.7958888888888889,
|
| 100289 |
+
"grad_norm": 0.1005859375,
|
| 100290 |
+
"learning_rate": 8.265866388350598e-05,
|
| 100291 |
+
"loss": 2.293,
|
| 100292 |
+
"step": 14326
|
| 100293 |
+
},
|
| 100294 |
+
{
|
| 100295 |
+
"epoch": 0.7959444444444445,
|
| 100296 |
+
"grad_norm": 0.09912109375,
|
| 100297 |
+
"learning_rate": 8.261530201316787e-05,
|
| 100298 |
+
"loss": 2.3679,
|
| 100299 |
+
"step": 14327
|
| 100300 |
+
},
|
| 100301 |
+
{
|
| 100302 |
+
"epoch": 0.796,
|
| 100303 |
+
"grad_norm": 0.1005859375,
|
| 100304 |
+
"learning_rate": 8.25719502095633e-05,
|
| 100305 |
+
"loss": 2.3998,
|
| 100306 |
+
"step": 14328
|
| 100307 |
+
},
|
| 100308 |
+
{
|
| 100309 |
+
"epoch": 0.7960555555555555,
|
| 100310 |
+
"grad_norm": 0.1005859375,
|
| 100311 |
+
"learning_rate": 8.252860847406712e-05,
|
| 100312 |
+
"loss": 2.4115,
|
| 100313 |
+
"step": 14329
|
| 100314 |
+
},
|
| 100315 |
+
{
|
| 100316 |
+
"epoch": 0.7961111111111111,
|
| 100317 |
+
"grad_norm": 0.09912109375,
|
| 100318 |
+
"learning_rate": 8.248527680805396e-05,
|
| 100319 |
+
"loss": 2.3352,
|
| 100320 |
+
"step": 14330
|
| 100321 |
+
},
|
| 100322 |
+
{
|
| 100323 |
+
"epoch": 0.7961666666666667,
|
| 100324 |
+
"grad_norm": 0.09912109375,
|
| 100325 |
+
"learning_rate": 8.244195521289837e-05,
|
| 100326 |
+
"loss": 2.2836,
|
| 100327 |
+
"step": 14331
|
| 100328 |
+
},
|
| 100329 |
+
{
|
| 100330 |
+
"epoch": 0.7962222222222223,
|
| 100331 |
+
"grad_norm": 0.09912109375,
|
| 100332 |
+
"learning_rate": 8.239864368997428e-05,
|
| 100333 |
+
"loss": 2.3256,
|
| 100334 |
+
"step": 14332
|
| 100335 |
+
},
|
| 100336 |
+
{
|
| 100337 |
+
"epoch": 0.7962777777777778,
|
| 100338 |
+
"grad_norm": 0.09912109375,
|
| 100339 |
+
"learning_rate": 8.23553422406556e-05,
|
| 100340 |
+
"loss": 2.3598,
|
| 100341 |
+
"step": 14333
|
| 100342 |
+
},
|
| 100343 |
+
{
|
| 100344 |
+
"epoch": 0.7963333333333333,
|
| 100345 |
+
"grad_norm": 0.09716796875,
|
| 100346 |
+
"learning_rate": 8.231205086631555e-05,
|
| 100347 |
+
"loss": 2.3273,
|
| 100348 |
+
"step": 14334
|
| 100349 |
+
},
|
| 100350 |
+
{
|
| 100351 |
+
"epoch": 0.7963888888888889,
|
| 100352 |
+
"grad_norm": 0.09912109375,
|
| 100353 |
+
"learning_rate": 8.226876956832743e-05,
|
| 100354 |
+
"loss": 2.4516,
|
| 100355 |
+
"step": 14335
|
| 100356 |
+
},
|
| 100357 |
+
{
|
| 100358 |
+
"epoch": 0.7964444444444444,
|
| 100359 |
+
"grad_norm": 0.09765625,
|
| 100360 |
+
"learning_rate": 8.222549834806397e-05,
|
| 100361 |
+
"loss": 2.2874,
|
| 100362 |
+
"step": 14336
|
| 100363 |
+
},
|
| 100364 |
+
{
|
| 100365 |
+
"epoch": 0.7965,
|
| 100366 |
+
"grad_norm": 0.0986328125,
|
| 100367 |
+
"learning_rate": 8.218223720689748e-05,
|
| 100368 |
+
"loss": 2.309,
|
| 100369 |
+
"step": 14337
|
| 100370 |
+
},
|
| 100371 |
+
{
|
| 100372 |
+
"epoch": 0.7965555555555556,
|
| 100373 |
+
"grad_norm": 0.09814453125,
|
| 100374 |
+
"learning_rate": 8.213898614620039e-05,
|
| 100375 |
+
"loss": 2.3792,
|
| 100376 |
+
"step": 14338
|
| 100377 |
+
},
|
| 100378 |
+
{
|
| 100379 |
+
"epoch": 0.7966111111111112,
|
| 100380 |
+
"grad_norm": 0.10009765625,
|
| 100381 |
+
"learning_rate": 8.209574516734422e-05,
|
| 100382 |
+
"loss": 2.327,
|
| 100383 |
+
"step": 14339
|
| 100384 |
+
},
|
| 100385 |
+
{
|
| 100386 |
+
"epoch": 0.7966666666666666,
|
| 100387 |
+
"grad_norm": 0.10009765625,
|
| 100388 |
+
"learning_rate": 8.205251427170071e-05,
|
| 100389 |
+
"loss": 2.4236,
|
| 100390 |
+
"step": 14340
|
| 100391 |
+
},
|
| 100392 |
+
{
|
| 100393 |
+
"epoch": 0.7967222222222222,
|
| 100394 |
+
"grad_norm": 0.099609375,
|
| 100395 |
+
"learning_rate": 8.2009293460641e-05,
|
| 100396 |
+
"loss": 2.3646,
|
| 100397 |
+
"step": 14341
|
| 100398 |
+
},
|
| 100399 |
+
{
|
| 100400 |
+
"epoch": 0.7967777777777778,
|
| 100401 |
+
"grad_norm": 0.1005859375,
|
| 100402 |
+
"learning_rate": 8.196608273553584e-05,
|
| 100403 |
+
"loss": 2.4131,
|
| 100404 |
+
"step": 14342
|
| 100405 |
+
},
|
| 100406 |
+
{
|
| 100407 |
+
"epoch": 0.7968333333333333,
|
| 100408 |
+
"grad_norm": 0.0986328125,
|
| 100409 |
+
"learning_rate": 8.192288209775578e-05,
|
| 100410 |
+
"loss": 2.3235,
|
| 100411 |
+
"step": 14343
|
| 100412 |
+
},
|
| 100413 |
+
{
|
| 100414 |
+
"epoch": 0.7968888888888889,
|
| 100415 |
+
"grad_norm": 0.1025390625,
|
| 100416 |
+
"learning_rate": 8.18796915486713e-05,
|
| 100417 |
+
"loss": 2.2815,
|
| 100418 |
+
"step": 14344
|
| 100419 |
+
},
|
| 100420 |
+
{
|
| 100421 |
+
"epoch": 0.7969444444444445,
|
| 100422 |
+
"grad_norm": 0.09912109375,
|
| 100423 |
+
"learning_rate": 8.183651108965213e-05,
|
| 100424 |
+
"loss": 2.3378,
|
| 100425 |
+
"step": 14345
|
| 100426 |
+
},
|
| 100427 |
+
{
|
| 100428 |
+
"epoch": 0.797,
|
| 100429 |
+
"grad_norm": 0.09912109375,
|
| 100430 |
+
"learning_rate": 8.179334072206773e-05,
|
| 100431 |
+
"loss": 2.348,
|
| 100432 |
+
"step": 14346
|
| 100433 |
+
},
|
| 100434 |
+
{
|
| 100435 |
+
"epoch": 0.7970555555555555,
|
| 100436 |
+
"grad_norm": 0.10009765625,
|
| 100437 |
+
"learning_rate": 8.175018044728764e-05,
|
| 100438 |
+
"loss": 2.3617,
|
| 100439 |
+
"step": 14347
|
| 100440 |
+
},
|
| 100441 |
+
{
|
| 100442 |
+
"epoch": 0.7971111111111111,
|
| 100443 |
+
"grad_norm": 0.099609375,
|
| 100444 |
+
"learning_rate": 8.170703026668061e-05,
|
| 100445 |
+
"loss": 2.4015,
|
| 100446 |
+
"step": 14348
|
| 100447 |
+
},
|
| 100448 |
+
{
|
| 100449 |
+
"epoch": 0.7971666666666667,
|
| 100450 |
+
"grad_norm": 0.09765625,
|
| 100451 |
+
"learning_rate": 8.166389018161526e-05,
|
| 100452 |
+
"loss": 2.3555,
|
| 100453 |
+
"step": 14349
|
| 100454 |
+
},
|
| 100455 |
+
{
|
| 100456 |
+
"epoch": 0.7972222222222223,
|
| 100457 |
+
"grad_norm": 0.09912109375,
|
| 100458 |
+
"learning_rate": 8.162076019345999e-05,
|
| 100459 |
+
"loss": 2.3574,
|
| 100460 |
+
"step": 14350
|
| 100461 |
+
},
|
| 100462 |
+
{
|
| 100463 |
+
"epoch": 0.7972777777777778,
|
| 100464 |
+
"grad_norm": 0.10009765625,
|
| 100465 |
+
"learning_rate": 8.157764030358292e-05,
|
| 100466 |
+
"loss": 2.3422,
|
| 100467 |
+
"step": 14351
|
| 100468 |
+
},
|
| 100469 |
+
{
|
| 100470 |
+
"epoch": 0.7973333333333333,
|
| 100471 |
+
"grad_norm": 0.10009765625,
|
| 100472 |
+
"learning_rate": 8.153453051335138e-05,
|
| 100473 |
+
"loss": 2.3079,
|
| 100474 |
+
"step": 14352
|
| 100475 |
+
},
|
| 100476 |
+
{
|
| 100477 |
+
"epoch": 0.7973888888888889,
|
| 100478 |
+
"grad_norm": 0.1015625,
|
| 100479 |
+
"learning_rate": 8.149143082413285e-05,
|
| 100480 |
+
"loss": 2.3567,
|
| 100481 |
+
"step": 14353
|
| 100482 |
+
},
|
| 100483 |
+
{
|
| 100484 |
+
"epoch": 0.7974444444444444,
|
| 100485 |
+
"grad_norm": 0.10009765625,
|
| 100486 |
+
"learning_rate": 8.144834123729452e-05,
|
| 100487 |
+
"loss": 2.3547,
|
| 100488 |
+
"step": 14354
|
| 100489 |
+
},
|
| 100490 |
+
{
|
| 100491 |
+
"epoch": 0.7975,
|
| 100492 |
+
"grad_norm": 0.1015625,
|
| 100493 |
+
"learning_rate": 8.140526175420297e-05,
|
| 100494 |
+
"loss": 2.4198,
|
| 100495 |
+
"step": 14355
|
| 100496 |
+
},
|
| 100497 |
+
{
|
| 100498 |
+
"epoch": 0.7975555555555556,
|
| 100499 |
+
"grad_norm": 0.0986328125,
|
| 100500 |
+
"learning_rate": 8.136219237622453e-05,
|
| 100501 |
+
"loss": 2.3616,
|
| 100502 |
+
"step": 14356
|
| 100503 |
+
},
|
| 100504 |
+
{
|
| 100505 |
+
"epoch": 0.7976111111111112,
|
| 100506 |
+
"grad_norm": 0.09912109375,
|
| 100507 |
+
"learning_rate": 8.131913310472539e-05,
|
| 100508 |
+
"loss": 2.3216,
|
| 100509 |
+
"step": 14357
|
| 100510 |
+
},
|
| 100511 |
+
{
|
| 100512 |
+
"epoch": 0.7976666666666666,
|
| 100513 |
+
"grad_norm": 0.0986328125,
|
| 100514 |
+
"learning_rate": 8.12760839410712e-05,
|
| 100515 |
+
"loss": 2.2851,
|
| 100516 |
+
"step": 14358
|
| 100517 |
+
},
|
| 100518 |
+
{
|
| 100519 |
+
"epoch": 0.7977222222222222,
|
| 100520 |
+
"grad_norm": 0.099609375,
|
| 100521 |
+
"learning_rate": 8.123304488662737e-05,
|
| 100522 |
+
"loss": 2.3451,
|
| 100523 |
+
"step": 14359
|
| 100524 |
+
},
|
| 100525 |
+
{
|
| 100526 |
+
"epoch": 0.7977777777777778,
|
| 100527 |
+
"grad_norm": 0.09912109375,
|
| 100528 |
+
"learning_rate": 8.1190015942759e-05,
|
| 100529 |
+
"loss": 2.333,
|
| 100530 |
+
"step": 14360
|
| 100531 |
+
},
|
| 100532 |
+
{
|
| 100533 |
+
"epoch": 0.7978333333333333,
|
| 100534 |
+
"grad_norm": 0.10205078125,
|
| 100535 |
+
"learning_rate": 8.114699711083113e-05,
|
| 100536 |
+
"loss": 2.4056,
|
| 100537 |
+
"step": 14361
|
| 100538 |
+
},
|
| 100539 |
+
{
|
| 100540 |
+
"epoch": 0.7978888888888889,
|
| 100541 |
+
"grad_norm": 0.0986328125,
|
| 100542 |
+
"learning_rate": 8.110398839220779e-05,
|
| 100543 |
+
"loss": 2.3706,
|
| 100544 |
+
"step": 14362
|
| 100545 |
+
},
|
| 100546 |
+
{
|
| 100547 |
+
"epoch": 0.7979444444444445,
|
| 100548 |
+
"grad_norm": 0.09814453125,
|
| 100549 |
+
"learning_rate": 8.106098978825331e-05,
|
| 100550 |
+
"loss": 2.3223,
|
| 100551 |
+
"step": 14363
|
| 100552 |
+
},
|
| 100553 |
+
{
|
| 100554 |
+
"epoch": 0.798,
|
| 100555 |
+
"grad_norm": 0.099609375,
|
| 100556 |
+
"learning_rate": 8.10180013003317e-05,
|
| 100557 |
+
"loss": 2.3377,
|
| 100558 |
+
"step": 14364
|
| 100559 |
+
},
|
| 100560 |
+
{
|
| 100561 |
+
"epoch": 0.7980555555555555,
|
| 100562 |
+
"grad_norm": 0.09912109375,
|
| 100563 |
+
"learning_rate": 8.097502292980625e-05,
|
| 100564 |
+
"loss": 2.3231,
|
| 100565 |
+
"step": 14365
|
| 100566 |
+
},
|
| 100567 |
+
{
|
| 100568 |
+
"epoch": 0.7981111111111111,
|
| 100569 |
+
"grad_norm": 0.09765625,
|
| 100570 |
+
"learning_rate": 8.093205467804011e-05,
|
| 100571 |
+
"loss": 2.2583,
|
| 100572 |
+
"step": 14366
|
| 100573 |
+
},
|
| 100574 |
+
{
|
| 100575 |
+
"epoch": 0.7981666666666667,
|
| 100576 |
+
"grad_norm": 0.10009765625,
|
| 100577 |
+
"learning_rate": 8.088909654639629e-05,
|
| 100578 |
+
"loss": 2.368,
|
| 100579 |
+
"step": 14367
|
| 100580 |
+
},
|
| 100581 |
+
{
|
| 100582 |
+
"epoch": 0.7982222222222223,
|
| 100583 |
+
"grad_norm": 0.099609375,
|
| 100584 |
+
"learning_rate": 8.084614853623724e-05,
|
| 100585 |
+
"loss": 2.4208,
|
| 100586 |
+
"step": 14368
|
| 100587 |
+
},
|
| 100588 |
+
{
|
| 100589 |
+
"epoch": 0.7982777777777778,
|
| 100590 |
+
"grad_norm": 0.10107421875,
|
| 100591 |
+
"learning_rate": 8.080321064892511e-05,
|
| 100592 |
+
"loss": 2.3859,
|
| 100593 |
+
"step": 14369
|
| 100594 |
+
},
|
| 100595 |
+
{
|
| 100596 |
+
"epoch": 0.7983333333333333,
|
| 100597 |
+
"grad_norm": 0.0986328125,
|
| 100598 |
+
"learning_rate": 8.076028288582183e-05,
|
| 100599 |
+
"loss": 2.2536,
|
| 100600 |
+
"step": 14370
|
| 100601 |
+
},
|
| 100602 |
+
{
|
| 100603 |
+
"epoch": 0.7983888888888889,
|
| 100604 |
+
"grad_norm": 0.0986328125,
|
| 100605 |
+
"learning_rate": 8.07173652482891e-05,
|
| 100606 |
+
"loss": 2.3096,
|
| 100607 |
+
"step": 14371
|
| 100608 |
+
},
|
| 100609 |
+
{
|
| 100610 |
+
"epoch": 0.7984444444444444,
|
| 100611 |
+
"grad_norm": 0.099609375,
|
| 100612 |
+
"learning_rate": 8.067445773768812e-05,
|
| 100613 |
+
"loss": 2.2799,
|
| 100614 |
+
"step": 14372
|
| 100615 |
+
},
|
| 100616 |
+
{
|
| 100617 |
+
"epoch": 0.7985,
|
| 100618 |
+
"grad_norm": 0.10009765625,
|
| 100619 |
+
"learning_rate": 8.063156035537965e-05,
|
| 100620 |
+
"loss": 2.3495,
|
| 100621 |
+
"step": 14373
|
| 100622 |
+
},
|
| 100623 |
+
{
|
| 100624 |
+
"epoch": 0.7985555555555556,
|
| 100625 |
+
"grad_norm": 0.10009765625,
|
| 100626 |
+
"learning_rate": 8.058867310272451e-05,
|
| 100627 |
+
"loss": 2.3945,
|
| 100628 |
+
"step": 14374
|
| 100629 |
+
},
|
| 100630 |
+
{
|
| 100631 |
+
"epoch": 0.7986111111111112,
|
| 100632 |
+
"grad_norm": 0.1005859375,
|
| 100633 |
+
"learning_rate": 8.054579598108296e-05,
|
| 100634 |
+
"loss": 2.3551,
|
| 100635 |
+
"step": 14375
|
| 100636 |
+
},
|
| 100637 |
+
{
|
| 100638 |
+
"epoch": 0.7986666666666666,
|
| 100639 |
+
"grad_norm": 0.1005859375,
|
| 100640 |
+
"learning_rate": 8.050292899181476e-05,
|
| 100641 |
+
"loss": 2.3757,
|
| 100642 |
+
"step": 14376
|
| 100643 |
+
},
|
| 100644 |
+
{
|
| 100645 |
+
"epoch": 0.7987222222222222,
|
| 100646 |
+
"grad_norm": 0.09912109375,
|
| 100647 |
+
"learning_rate": 8.046007213627982e-05,
|
| 100648 |
+
"loss": 2.2986,
|
| 100649 |
+
"step": 14377
|
| 100650 |
+
},
|
| 100651 |
+
{
|
| 100652 |
+
"epoch": 0.7987777777777778,
|
| 100653 |
+
"grad_norm": 0.099609375,
|
| 100654 |
+
"learning_rate": 8.041722541583725e-05,
|
| 100655 |
+
"loss": 2.3009,
|
| 100656 |
+
"step": 14378
|
| 100657 |
+
},
|
| 100658 |
+
{
|
| 100659 |
+
"epoch": 0.7988333333333333,
|
| 100660 |
+
"grad_norm": 0.10009765625,
|
| 100661 |
+
"learning_rate": 8.037438883184626e-05,
|
| 100662 |
+
"loss": 2.3863,
|
| 100663 |
+
"step": 14379
|
| 100664 |
+
},
|
| 100665 |
+
{
|
| 100666 |
+
"epoch": 0.7988888888888889,
|
| 100667 |
+
"grad_norm": 0.1005859375,
|
| 100668 |
+
"learning_rate": 8.033156238566535e-05,
|
| 100669 |
+
"loss": 2.3604,
|
| 100670 |
+
"step": 14380
|
| 100671 |
+
},
|
| 100672 |
+
{
|
| 100673 |
+
"epoch": 0.7989444444444445,
|
| 100674 |
+
"grad_norm": 0.10009765625,
|
| 100675 |
+
"learning_rate": 8.0288746078653e-05,
|
| 100676 |
+
"loss": 2.4016,
|
| 100677 |
+
"step": 14381
|
| 100678 |
+
},
|
| 100679 |
+
{
|
| 100680 |
+
"epoch": 0.799,
|
| 100681 |
+
"grad_norm": 0.10107421875,
|
| 100682 |
+
"learning_rate": 8.024593991216725e-05,
|
| 100683 |
+
"loss": 2.369,
|
| 100684 |
+
"step": 14382
|
| 100685 |
+
},
|
| 100686 |
+
{
|
| 100687 |
+
"epoch": 0.7990555555555555,
|
| 100688 |
+
"grad_norm": 0.0986328125,
|
| 100689 |
+
"learning_rate": 8.020314388756562e-05,
|
| 100690 |
+
"loss": 2.297,
|
| 100691 |
+
"step": 14383
|
| 100692 |
+
},
|
| 100693 |
+
{
|
| 100694 |
+
"epoch": 0.7991111111111111,
|
| 100695 |
+
"grad_norm": 0.099609375,
|
| 100696 |
+
"learning_rate": 8.016035800620581e-05,
|
| 100697 |
+
"loss": 2.3566,
|
| 100698 |
+
"step": 14384
|
| 100699 |
+
},
|
| 100700 |
+
{
|
| 100701 |
+
"epoch": 0.7991666666666667,
|
| 100702 |
+
"grad_norm": 0.0986328125,
|
| 100703 |
+
"learning_rate": 8.011758226944457e-05,
|
| 100704 |
+
"loss": 2.287,
|
| 100705 |
+
"step": 14385
|
| 100706 |
+
},
|
| 100707 |
+
{
|
| 100708 |
+
"epoch": 0.7992222222222222,
|
| 100709 |
+
"grad_norm": 0.0986328125,
|
| 100710 |
+
"learning_rate": 8.007481667863896e-05,
|
| 100711 |
+
"loss": 2.3296,
|
| 100712 |
+
"step": 14386
|
| 100713 |
+
},
|
| 100714 |
+
{
|
| 100715 |
+
"epoch": 0.7992777777777778,
|
| 100716 |
+
"grad_norm": 0.1025390625,
|
| 100717 |
+
"learning_rate": 8.003206123514525e-05,
|
| 100718 |
+
"loss": 2.4299,
|
| 100719 |
+
"step": 14387
|
| 100720 |
+
},
|
| 100721 |
+
{
|
| 100722 |
+
"epoch": 0.7993333333333333,
|
| 100723 |
+
"grad_norm": 0.0986328125,
|
| 100724 |
+
"learning_rate": 7.998931594031942e-05,
|
| 100725 |
+
"loss": 2.3769,
|
| 100726 |
+
"step": 14388
|
| 100727 |
+
},
|
| 100728 |
+
{
|
| 100729 |
+
"epoch": 0.7993888888888889,
|
| 100730 |
+
"grad_norm": 0.10009765625,
|
| 100731 |
+
"learning_rate": 7.994658079551754e-05,
|
| 100732 |
+
"loss": 2.2692,
|
| 100733 |
+
"step": 14389
|
| 100734 |
+
},
|
| 100735 |
+
{
|
| 100736 |
+
"epoch": 0.7994444444444444,
|
| 100737 |
+
"grad_norm": 0.0986328125,
|
| 100738 |
+
"learning_rate": 7.990385580209481e-05,
|
| 100739 |
+
"loss": 2.2824,
|
| 100740 |
+
"step": 14390
|
| 100741 |
+
},
|
| 100742 |
+
{
|
| 100743 |
+
"epoch": 0.7995,
|
| 100744 |
+
"grad_norm": 0.0986328125,
|
| 100745 |
+
"learning_rate": 7.98611409614066e-05,
|
| 100746 |
+
"loss": 2.3314,
|
| 100747 |
+
"step": 14391
|
| 100748 |
+
},
|
| 100749 |
+
{
|
| 100750 |
+
"epoch": 0.7995555555555556,
|
| 100751 |
+
"grad_norm": 0.10009765625,
|
| 100752 |
+
"learning_rate": 7.981843627480751e-05,
|
| 100753 |
+
"loss": 2.4009,
|
| 100754 |
+
"step": 14392
|
| 100755 |
+
},
|
| 100756 |
+
{
|
| 100757 |
+
"epoch": 0.7996111111111112,
|
| 100758 |
+
"grad_norm": 0.099609375,
|
| 100759 |
+
"learning_rate": 7.977574174365226e-05,
|
| 100760 |
+
"loss": 2.3899,
|
| 100761 |
+
"step": 14393
|
| 100762 |
+
},
|
| 100763 |
+
{
|
| 100764 |
+
"epoch": 0.7996666666666666,
|
| 100765 |
+
"grad_norm": 0.10009765625,
|
| 100766 |
+
"learning_rate": 7.973305736929488e-05,
|
| 100767 |
+
"loss": 2.4227,
|
| 100768 |
+
"step": 14394
|
| 100769 |
+
},
|
| 100770 |
+
{
|
| 100771 |
+
"epoch": 0.7997222222222222,
|
| 100772 |
+
"grad_norm": 0.10009765625,
|
| 100773 |
+
"learning_rate": 7.96903831530892e-05,
|
| 100774 |
+
"loss": 2.3197,
|
| 100775 |
+
"step": 14395
|
| 100776 |
+
},
|
| 100777 |
+
{
|
| 100778 |
+
"epoch": 0.7997777777777778,
|
| 100779 |
+
"grad_norm": 0.099609375,
|
| 100780 |
+
"learning_rate": 7.964771909638886e-05,
|
| 100781 |
+
"loss": 2.3228,
|
| 100782 |
+
"step": 14396
|
| 100783 |
+
},
|
| 100784 |
+
{
|
| 100785 |
+
"epoch": 0.7998333333333333,
|
| 100786 |
+
"grad_norm": 0.1005859375,
|
| 100787 |
+
"learning_rate": 7.960506520054706e-05,
|
| 100788 |
+
"loss": 2.3615,
|
| 100789 |
+
"step": 14397
|
| 100790 |
+
},
|
| 100791 |
+
{
|
| 100792 |
+
"epoch": 0.7998888888888889,
|
| 100793 |
+
"grad_norm": 0.09912109375,
|
| 100794 |
+
"learning_rate": 7.956242146691652e-05,
|
| 100795 |
+
"loss": 2.3147,
|
| 100796 |
+
"step": 14398
|
| 100797 |
+
},
|
| 100798 |
+
{
|
| 100799 |
+
"epoch": 0.7999444444444445,
|
| 100800 |
+
"grad_norm": 0.10107421875,
|
| 100801 |
+
"learning_rate": 7.951978789684993e-05,
|
| 100802 |
+
"loss": 2.4137,
|
| 100803 |
+
"step": 14399
|
| 100804 |
+
},
|
| 100805 |
+
{
|
| 100806 |
+
"epoch": 0.8,
|
| 100807 |
+
"grad_norm": 0.10009765625,
|
| 100808 |
+
"learning_rate": 7.947716449169962e-05,
|
| 100809 |
+
"loss": 2.4104,
|
| 100810 |
+
"step": 14400
|
| 100811 |
}
|
| 100812 |
],
|
| 100813 |
"logging_steps": 1,
|
|
|
|
| 100827 |
"attributes": {}
|
| 100828 |
}
|
| 100829 |
},
|
| 100830 |
+
"total_flos": 7.597524373651427e+18,
|
| 100831 |
"train_batch_size": 8,
|
| 100832 |
"trial_name": null,
|
| 100833 |
"trial_params": null
|