Training in progress, step 14700, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000555808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff5e783eaf25c2380749fecf678eb908fffb3575959cf67db7e741bb5f75f4f5
|
| 3 |
size 1000555808
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1318473087
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5deea5631a4fd7df4829b883c5b7c6bf6d4fa6db780bb569bb25323b958d0942
|
| 3 |
size 1318473087
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1681141190c59875f73b80204bd794e5d41bd6370e6d0f704725cb3853f0d145
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -100808,6 +100808,2106 @@
|
|
| 100808 |
"learning_rate": 7.947716449169962e-05,
|
| 100809 |
"loss": 2.4104,
|
| 100810 |
"step": 14400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100811 |
}
|
| 100812 |
],
|
| 100813 |
"logging_steps": 1,
|
|
@@ -100827,7 +102927,7 @@
|
|
| 100827 |
"attributes": {}
|
| 100828 |
}
|
| 100829 |
},
|
| 100830 |
-
"total_flos": 7.
|
| 100831 |
"train_batch_size": 8,
|
| 100832 |
"trial_name": null,
|
| 100833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.8166666666666667,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 100808 |
"learning_rate": 7.947716449169962e-05,
|
| 100809 |
"loss": 2.4104,
|
| 100810 |
"step": 14400
|
| 100811 |
+
},
|
| 100812 |
+
{
|
| 100813 |
+
"epoch": 0.8000555555555555,
|
| 100814 |
+
"grad_norm": 0.1005859375,
|
| 100815 |
+
"learning_rate": 7.94345512528174e-05,
|
| 100816 |
+
"loss": 2.3538,
|
| 100817 |
+
"step": 14401
|
| 100818 |
+
},
|
| 100819 |
+
{
|
| 100820 |
+
"epoch": 0.8001111111111111,
|
| 100821 |
+
"grad_norm": 0.09912109375,
|
| 100822 |
+
"learning_rate": 7.939194818155478e-05,
|
| 100823 |
+
"loss": 2.3306,
|
| 100824 |
+
"step": 14402
|
| 100825 |
+
},
|
| 100826 |
+
{
|
| 100827 |
+
"epoch": 0.8001666666666667,
|
| 100828 |
+
"grad_norm": 0.10009765625,
|
| 100829 |
+
"learning_rate": 7.934935527926323e-05,
|
| 100830 |
+
"loss": 2.3396,
|
| 100831 |
+
"step": 14403
|
| 100832 |
+
},
|
| 100833 |
+
{
|
| 100834 |
+
"epoch": 0.8002222222222222,
|
| 100835 |
+
"grad_norm": 0.0986328125,
|
| 100836 |
+
"learning_rate": 7.930677254729366e-05,
|
| 100837 |
+
"loss": 2.293,
|
| 100838 |
+
"step": 14404
|
| 100839 |
+
},
|
| 100840 |
+
{
|
| 100841 |
+
"epoch": 0.8002777777777778,
|
| 100842 |
+
"grad_norm": 0.09912109375,
|
| 100843 |
+
"learning_rate": 7.926419998699648e-05,
|
| 100844 |
+
"loss": 2.319,
|
| 100845 |
+
"step": 14405
|
| 100846 |
+
},
|
| 100847 |
+
{
|
| 100848 |
+
"epoch": 0.8003333333333333,
|
| 100849 |
+
"grad_norm": 0.10009765625,
|
| 100850 |
+
"learning_rate": 7.922163759972221e-05,
|
| 100851 |
+
"loss": 2.4561,
|
| 100852 |
+
"step": 14406
|
| 100853 |
+
},
|
| 100854 |
+
{
|
| 100855 |
+
"epoch": 0.8003888888888889,
|
| 100856 |
+
"grad_norm": 0.1025390625,
|
| 100857 |
+
"learning_rate": 7.917908538682097e-05,
|
| 100858 |
+
"loss": 2.4193,
|
| 100859 |
+
"step": 14407
|
| 100860 |
+
},
|
| 100861 |
+
{
|
| 100862 |
+
"epoch": 0.8004444444444444,
|
| 100863 |
+
"grad_norm": 0.09814453125,
|
| 100864 |
+
"learning_rate": 7.913654334964205e-05,
|
| 100865 |
+
"loss": 2.3082,
|
| 100866 |
+
"step": 14408
|
| 100867 |
+
},
|
| 100868 |
+
{
|
| 100869 |
+
"epoch": 0.8005,
|
| 100870 |
+
"grad_norm": 0.09814453125,
|
| 100871 |
+
"learning_rate": 7.909401148953498e-05,
|
| 100872 |
+
"loss": 2.3311,
|
| 100873 |
+
"step": 14409
|
| 100874 |
+
},
|
| 100875 |
+
{
|
| 100876 |
+
"epoch": 0.8005555555555556,
|
| 100877 |
+
"grad_norm": 0.09765625,
|
| 100878 |
+
"learning_rate": 7.905148980784885e-05,
|
| 100879 |
+
"loss": 2.2984,
|
| 100880 |
+
"step": 14410
|
| 100881 |
+
},
|
| 100882 |
+
{
|
| 100883 |
+
"epoch": 0.8006111111111112,
|
| 100884 |
+
"grad_norm": 0.10107421875,
|
| 100885 |
+
"learning_rate": 7.900897830593228e-05,
|
| 100886 |
+
"loss": 2.4004,
|
| 100887 |
+
"step": 14411
|
| 100888 |
+
},
|
| 100889 |
+
{
|
| 100890 |
+
"epoch": 0.8006666666666666,
|
| 100891 |
+
"grad_norm": 0.099609375,
|
| 100892 |
+
"learning_rate": 7.896647698513354e-05,
|
| 100893 |
+
"loss": 2.3758,
|
| 100894 |
+
"step": 14412
|
| 100895 |
+
},
|
| 100896 |
+
{
|
| 100897 |
+
"epoch": 0.8007222222222222,
|
| 100898 |
+
"grad_norm": 0.0986328125,
|
| 100899 |
+
"learning_rate": 7.892398584680072e-05,
|
| 100900 |
+
"loss": 2.3663,
|
| 100901 |
+
"step": 14413
|
| 100902 |
+
},
|
| 100903 |
+
{
|
| 100904 |
+
"epoch": 0.8007777777777778,
|
| 100905 |
+
"grad_norm": 0.1005859375,
|
| 100906 |
+
"learning_rate": 7.888150489228188e-05,
|
| 100907 |
+
"loss": 2.3208,
|
| 100908 |
+
"step": 14414
|
| 100909 |
+
},
|
| 100910 |
+
{
|
| 100911 |
+
"epoch": 0.8008333333333333,
|
| 100912 |
+
"grad_norm": 0.0986328125,
|
| 100913 |
+
"learning_rate": 7.883903412292388e-05,
|
| 100914 |
+
"loss": 2.2747,
|
| 100915 |
+
"step": 14415
|
| 100916 |
+
},
|
| 100917 |
+
{
|
| 100918 |
+
"epoch": 0.8008888888888889,
|
| 100919 |
+
"grad_norm": 0.09814453125,
|
| 100920 |
+
"learning_rate": 7.879657354007406e-05,
|
| 100921 |
+
"loss": 2.2726,
|
| 100922 |
+
"step": 14416
|
| 100923 |
+
},
|
| 100924 |
+
{
|
| 100925 |
+
"epoch": 0.8009444444444445,
|
| 100926 |
+
"grad_norm": 0.10009765625,
|
| 100927 |
+
"learning_rate": 7.875412314507942e-05,
|
| 100928 |
+
"loss": 2.3161,
|
| 100929 |
+
"step": 14417
|
| 100930 |
+
},
|
| 100931 |
+
{
|
| 100932 |
+
"epoch": 0.801,
|
| 100933 |
+
"grad_norm": 0.10009765625,
|
| 100934 |
+
"learning_rate": 7.871168293928585e-05,
|
| 100935 |
+
"loss": 2.4785,
|
| 100936 |
+
"step": 14418
|
| 100937 |
+
},
|
| 100938 |
+
{
|
| 100939 |
+
"epoch": 0.8010555555555555,
|
| 100940 |
+
"grad_norm": 0.10107421875,
|
| 100941 |
+
"learning_rate": 7.866925292403981e-05,
|
| 100942 |
+
"loss": 2.4305,
|
| 100943 |
+
"step": 14419
|
| 100944 |
+
},
|
| 100945 |
+
{
|
| 100946 |
+
"epoch": 0.8011111111111111,
|
| 100947 |
+
"grad_norm": 0.09765625,
|
| 100948 |
+
"learning_rate": 7.862683310068715e-05,
|
| 100949 |
+
"loss": 2.295,
|
| 100950 |
+
"step": 14420
|
| 100951 |
+
},
|
| 100952 |
+
{
|
| 100953 |
+
"epoch": 0.8011666666666667,
|
| 100954 |
+
"grad_norm": 0.0986328125,
|
| 100955 |
+
"learning_rate": 7.858442347057313e-05,
|
| 100956 |
+
"loss": 2.3188,
|
| 100957 |
+
"step": 14421
|
| 100958 |
+
},
|
| 100959 |
+
{
|
| 100960 |
+
"epoch": 0.8012222222222222,
|
| 100961 |
+
"grad_norm": 0.1015625,
|
| 100962 |
+
"learning_rate": 7.854202403504291e-05,
|
| 100963 |
+
"loss": 2.3337,
|
| 100964 |
+
"step": 14422
|
| 100965 |
+
},
|
| 100966 |
+
{
|
| 100967 |
+
"epoch": 0.8012777777777778,
|
| 100968 |
+
"grad_norm": 0.09814453125,
|
| 100969 |
+
"learning_rate": 7.849963479544137e-05,
|
| 100970 |
+
"loss": 2.338,
|
| 100971 |
+
"step": 14423
|
| 100972 |
+
},
|
| 100973 |
+
{
|
| 100974 |
+
"epoch": 0.8013333333333333,
|
| 100975 |
+
"grad_norm": 0.10009765625,
|
| 100976 |
+
"learning_rate": 7.845725575311309e-05,
|
| 100977 |
+
"loss": 2.3416,
|
| 100978 |
+
"step": 14424
|
| 100979 |
+
},
|
| 100980 |
+
{
|
| 100981 |
+
"epoch": 0.8013888888888889,
|
| 100982 |
+
"grad_norm": 0.10107421875,
|
| 100983 |
+
"learning_rate": 7.841488690940213e-05,
|
| 100984 |
+
"loss": 2.3768,
|
| 100985 |
+
"step": 14425
|
| 100986 |
+
},
|
| 100987 |
+
{
|
| 100988 |
+
"epoch": 0.8014444444444444,
|
| 100989 |
+
"grad_norm": 0.09814453125,
|
| 100990 |
+
"learning_rate": 7.837252826565222e-05,
|
| 100991 |
+
"loss": 2.3157,
|
| 100992 |
+
"step": 14426
|
| 100993 |
+
},
|
| 100994 |
+
{
|
| 100995 |
+
"epoch": 0.8015,
|
| 100996 |
+
"grad_norm": 0.10009765625,
|
| 100997 |
+
"learning_rate": 7.833017982320714e-05,
|
| 100998 |
+
"loss": 2.3622,
|
| 100999 |
+
"step": 14427
|
| 101000 |
+
},
|
| 101001 |
+
{
|
| 101002 |
+
"epoch": 0.8015555555555556,
|
| 101003 |
+
"grad_norm": 0.09912109375,
|
| 101004 |
+
"learning_rate": 7.828784158340994e-05,
|
| 101005 |
+
"loss": 2.2799,
|
| 101006 |
+
"step": 14428
|
| 101007 |
+
},
|
| 101008 |
+
{
|
| 101009 |
+
"epoch": 0.8016111111111112,
|
| 101010 |
+
"grad_norm": 0.10009765625,
|
| 101011 |
+
"learning_rate": 7.824551354760342e-05,
|
| 101012 |
+
"loss": 2.3318,
|
| 101013 |
+
"step": 14429
|
| 101014 |
+
},
|
| 101015 |
+
{
|
| 101016 |
+
"epoch": 0.8016666666666666,
|
| 101017 |
+
"grad_norm": 0.10107421875,
|
| 101018 |
+
"learning_rate": 7.820319571713031e-05,
|
| 101019 |
+
"loss": 2.3904,
|
| 101020 |
+
"step": 14430
|
| 101021 |
+
},
|
| 101022 |
+
{
|
| 101023 |
+
"epoch": 0.8017222222222222,
|
| 101024 |
+
"grad_norm": 0.099609375,
|
| 101025 |
+
"learning_rate": 7.816088809333266e-05,
|
| 101026 |
+
"loss": 2.3978,
|
| 101027 |
+
"step": 14431
|
| 101028 |
+
},
|
| 101029 |
+
{
|
| 101030 |
+
"epoch": 0.8017777777777778,
|
| 101031 |
+
"grad_norm": 0.10009765625,
|
| 101032 |
+
"learning_rate": 7.811859067755256e-05,
|
| 101033 |
+
"loss": 2.334,
|
| 101034 |
+
"step": 14432
|
| 101035 |
+
},
|
| 101036 |
+
{
|
| 101037 |
+
"epoch": 0.8018333333333333,
|
| 101038 |
+
"grad_norm": 0.1005859375,
|
| 101039 |
+
"learning_rate": 7.807630347113138e-05,
|
| 101040 |
+
"loss": 2.3308,
|
| 101041 |
+
"step": 14433
|
| 101042 |
+
},
|
| 101043 |
+
{
|
| 101044 |
+
"epoch": 0.8018888888888889,
|
| 101045 |
+
"grad_norm": 0.10009765625,
|
| 101046 |
+
"learning_rate": 7.80340264754106e-05,
|
| 101047 |
+
"loss": 2.3721,
|
| 101048 |
+
"step": 14434
|
| 101049 |
+
},
|
| 101050 |
+
{
|
| 101051 |
+
"epoch": 0.8019444444444445,
|
| 101052 |
+
"grad_norm": 0.0986328125,
|
| 101053 |
+
"learning_rate": 7.7991759691731e-05,
|
| 101054 |
+
"loss": 2.3263,
|
| 101055 |
+
"step": 14435
|
| 101056 |
+
},
|
| 101057 |
+
{
|
| 101058 |
+
"epoch": 0.802,
|
| 101059 |
+
"grad_norm": 0.09814453125,
|
| 101060 |
+
"learning_rate": 7.794950312143318e-05,
|
| 101061 |
+
"loss": 2.3366,
|
| 101062 |
+
"step": 14436
|
| 101063 |
+
},
|
| 101064 |
+
{
|
| 101065 |
+
"epoch": 0.8020555555555555,
|
| 101066 |
+
"grad_norm": 0.10205078125,
|
| 101067 |
+
"learning_rate": 7.790725676585756e-05,
|
| 101068 |
+
"loss": 2.4236,
|
| 101069 |
+
"step": 14437
|
| 101070 |
+
},
|
| 101071 |
+
{
|
| 101072 |
+
"epoch": 0.8021111111111111,
|
| 101073 |
+
"grad_norm": 0.09912109375,
|
| 101074 |
+
"learning_rate": 7.786502062634387e-05,
|
| 101075 |
+
"loss": 2.3188,
|
| 101076 |
+
"step": 14438
|
| 101077 |
+
},
|
| 101078 |
+
{
|
| 101079 |
+
"epoch": 0.8021666666666667,
|
| 101080 |
+
"grad_norm": 0.0986328125,
|
| 101081 |
+
"learning_rate": 7.782279470423199e-05,
|
| 101082 |
+
"loss": 2.2898,
|
| 101083 |
+
"step": 14439
|
| 101084 |
+
},
|
| 101085 |
+
{
|
| 101086 |
+
"epoch": 0.8022222222222222,
|
| 101087 |
+
"grad_norm": 0.10009765625,
|
| 101088 |
+
"learning_rate": 7.778057900086118e-05,
|
| 101089 |
+
"loss": 2.3365,
|
| 101090 |
+
"step": 14440
|
| 101091 |
+
},
|
| 101092 |
+
{
|
| 101093 |
+
"epoch": 0.8022777777777778,
|
| 101094 |
+
"grad_norm": 0.099609375,
|
| 101095 |
+
"learning_rate": 7.773837351757025e-05,
|
| 101096 |
+
"loss": 2.3105,
|
| 101097 |
+
"step": 14441
|
| 101098 |
+
},
|
| 101099 |
+
{
|
| 101100 |
+
"epoch": 0.8023333333333333,
|
| 101101 |
+
"grad_norm": 0.10009765625,
|
| 101102 |
+
"learning_rate": 7.769617825569805e-05,
|
| 101103 |
+
"loss": 2.3361,
|
| 101104 |
+
"step": 14442
|
| 101105 |
+
},
|
| 101106 |
+
{
|
| 101107 |
+
"epoch": 0.8023888888888889,
|
| 101108 |
+
"grad_norm": 0.10009765625,
|
| 101109 |
+
"learning_rate": 7.765399321658283e-05,
|
| 101110 |
+
"loss": 2.4099,
|
| 101111 |
+
"step": 14443
|
| 101112 |
+
},
|
| 101113 |
+
{
|
| 101114 |
+
"epoch": 0.8024444444444444,
|
| 101115 |
+
"grad_norm": 0.09912109375,
|
| 101116 |
+
"learning_rate": 7.761181840156266e-05,
|
| 101117 |
+
"loss": 2.3398,
|
| 101118 |
+
"step": 14444
|
| 101119 |
+
},
|
| 101120 |
+
{
|
| 101121 |
+
"epoch": 0.8025,
|
| 101122 |
+
"grad_norm": 0.10009765625,
|
| 101123 |
+
"learning_rate": 7.756965381197514e-05,
|
| 101124 |
+
"loss": 2.3353,
|
| 101125 |
+
"step": 14445
|
| 101126 |
+
},
|
| 101127 |
+
{
|
| 101128 |
+
"epoch": 0.8025555555555556,
|
| 101129 |
+
"grad_norm": 0.099609375,
|
| 101130 |
+
"learning_rate": 7.75274994491578e-05,
|
| 101131 |
+
"loss": 2.3702,
|
| 101132 |
+
"step": 14446
|
| 101133 |
+
},
|
| 101134 |
+
{
|
| 101135 |
+
"epoch": 0.8026111111111112,
|
| 101136 |
+
"grad_norm": 0.09912109375,
|
| 101137 |
+
"learning_rate": 7.748535531444758e-05,
|
| 101138 |
+
"loss": 2.323,
|
| 101139 |
+
"step": 14447
|
| 101140 |
+
},
|
| 101141 |
+
{
|
| 101142 |
+
"epoch": 0.8026666666666666,
|
| 101143 |
+
"grad_norm": 0.1015625,
|
| 101144 |
+
"learning_rate": 7.744322140918109e-05,
|
| 101145 |
+
"loss": 2.4195,
|
| 101146 |
+
"step": 14448
|
| 101147 |
+
},
|
| 101148 |
+
{
|
| 101149 |
+
"epoch": 0.8027222222222222,
|
| 101150 |
+
"grad_norm": 0.10107421875,
|
| 101151 |
+
"learning_rate": 7.740109773469497e-05,
|
| 101152 |
+
"loss": 2.3982,
|
| 101153 |
+
"step": 14449
|
| 101154 |
+
},
|
| 101155 |
+
{
|
| 101156 |
+
"epoch": 0.8027777777777778,
|
| 101157 |
+
"grad_norm": 0.09716796875,
|
| 101158 |
+
"learning_rate": 7.735898429232511e-05,
|
| 101159 |
+
"loss": 2.2683,
|
| 101160 |
+
"step": 14450
|
| 101161 |
+
},
|
| 101162 |
+
{
|
| 101163 |
+
"epoch": 0.8028333333333333,
|
| 101164 |
+
"grad_norm": 0.099609375,
|
| 101165 |
+
"learning_rate": 7.731688108340725e-05,
|
| 101166 |
+
"loss": 2.2876,
|
| 101167 |
+
"step": 14451
|
| 101168 |
+
},
|
| 101169 |
+
{
|
| 101170 |
+
"epoch": 0.8028888888888889,
|
| 101171 |
+
"grad_norm": 0.10009765625,
|
| 101172 |
+
"learning_rate": 7.727478810927679e-05,
|
| 101173 |
+
"loss": 2.3538,
|
| 101174 |
+
"step": 14452
|
| 101175 |
+
},
|
| 101176 |
+
{
|
| 101177 |
+
"epoch": 0.8029444444444445,
|
| 101178 |
+
"grad_norm": 0.09814453125,
|
| 101179 |
+
"learning_rate": 7.723270537126901e-05,
|
| 101180 |
+
"loss": 2.3443,
|
| 101181 |
+
"step": 14453
|
| 101182 |
+
},
|
| 101183 |
+
{
|
| 101184 |
+
"epoch": 0.803,
|
| 101185 |
+
"grad_norm": 0.10009765625,
|
| 101186 |
+
"learning_rate": 7.71906328707186e-05,
|
| 101187 |
+
"loss": 2.362,
|
| 101188 |
+
"step": 14454
|
| 101189 |
+
},
|
| 101190 |
+
{
|
| 101191 |
+
"epoch": 0.8030555555555555,
|
| 101192 |
+
"grad_norm": 0.09912109375,
|
| 101193 |
+
"learning_rate": 7.714857060895986e-05,
|
| 101194 |
+
"loss": 2.349,
|
| 101195 |
+
"step": 14455
|
| 101196 |
+
},
|
| 101197 |
+
{
|
| 101198 |
+
"epoch": 0.8031111111111111,
|
| 101199 |
+
"grad_norm": 0.09912109375,
|
| 101200 |
+
"learning_rate": 7.710651858732711e-05,
|
| 101201 |
+
"loss": 2.294,
|
| 101202 |
+
"step": 14456
|
| 101203 |
+
},
|
| 101204 |
+
{
|
| 101205 |
+
"epoch": 0.8031666666666667,
|
| 101206 |
+
"grad_norm": 0.099609375,
|
| 101207 |
+
"learning_rate": 7.706447680715405e-05,
|
| 101208 |
+
"loss": 2.3046,
|
| 101209 |
+
"step": 14457
|
| 101210 |
+
},
|
| 101211 |
+
{
|
| 101212 |
+
"epoch": 0.8032222222222222,
|
| 101213 |
+
"grad_norm": 0.10009765625,
|
| 101214 |
+
"learning_rate": 7.702244526977405e-05,
|
| 101215 |
+
"loss": 2.3428,
|
| 101216 |
+
"step": 14458
|
| 101217 |
+
},
|
| 101218 |
+
{
|
| 101219 |
+
"epoch": 0.8032777777777778,
|
| 101220 |
+
"grad_norm": 0.099609375,
|
| 101221 |
+
"learning_rate": 7.698042397652034e-05,
|
| 101222 |
+
"loss": 2.3773,
|
| 101223 |
+
"step": 14459
|
| 101224 |
+
},
|
| 101225 |
+
{
|
| 101226 |
+
"epoch": 0.8033333333333333,
|
| 101227 |
+
"grad_norm": 0.1005859375,
|
| 101228 |
+
"learning_rate": 7.693841292872602e-05,
|
| 101229 |
+
"loss": 2.3597,
|
| 101230 |
+
"step": 14460
|
| 101231 |
+
},
|
| 101232 |
+
{
|
| 101233 |
+
"epoch": 0.8033888888888889,
|
| 101234 |
+
"grad_norm": 0.1015625,
|
| 101235 |
+
"learning_rate": 7.689641212772309e-05,
|
| 101236 |
+
"loss": 2.4082,
|
| 101237 |
+
"step": 14461
|
| 101238 |
+
},
|
| 101239 |
+
{
|
| 101240 |
+
"epoch": 0.8034444444444444,
|
| 101241 |
+
"grad_norm": 0.0986328125,
|
| 101242 |
+
"learning_rate": 7.6854421574844e-05,
|
| 101243 |
+
"loss": 2.3393,
|
| 101244 |
+
"step": 14462
|
| 101245 |
+
},
|
| 101246 |
+
{
|
| 101247 |
+
"epoch": 0.8035,
|
| 101248 |
+
"grad_norm": 0.0986328125,
|
| 101249 |
+
"learning_rate": 7.681244127142062e-05,
|
| 101250 |
+
"loss": 2.2793,
|
| 101251 |
+
"step": 14463
|
| 101252 |
+
},
|
| 101253 |
+
{
|
| 101254 |
+
"epoch": 0.8035555555555556,
|
| 101255 |
+
"grad_norm": 0.09912109375,
|
| 101256 |
+
"learning_rate": 7.677047121878445e-05,
|
| 101257 |
+
"loss": 2.2809,
|
| 101258 |
+
"step": 14464
|
| 101259 |
+
},
|
| 101260 |
+
{
|
| 101261 |
+
"epoch": 0.8036111111111112,
|
| 101262 |
+
"grad_norm": 0.10205078125,
|
| 101263 |
+
"learning_rate": 7.672851141826649e-05,
|
| 101264 |
+
"loss": 2.4059,
|
| 101265 |
+
"step": 14465
|
| 101266 |
+
},
|
| 101267 |
+
{
|
| 101268 |
+
"epoch": 0.8036666666666666,
|
| 101269 |
+
"grad_norm": 0.099609375,
|
| 101270 |
+
"learning_rate": 7.668656187119791e-05,
|
| 101271 |
+
"loss": 2.3372,
|
| 101272 |
+
"step": 14466
|
| 101273 |
+
},
|
| 101274 |
+
{
|
| 101275 |
+
"epoch": 0.8037222222222222,
|
| 101276 |
+
"grad_norm": 0.1005859375,
|
| 101277 |
+
"learning_rate": 7.664462257890911e-05,
|
| 101278 |
+
"loss": 2.3644,
|
| 101279 |
+
"step": 14467
|
| 101280 |
+
},
|
| 101281 |
+
{
|
| 101282 |
+
"epoch": 0.8037777777777778,
|
| 101283 |
+
"grad_norm": 0.09912109375,
|
| 101284 |
+
"learning_rate": 7.660269354273024e-05,
|
| 101285 |
+
"loss": 2.2395,
|
| 101286 |
+
"step": 14468
|
| 101287 |
+
},
|
| 101288 |
+
{
|
| 101289 |
+
"epoch": 0.8038333333333333,
|
| 101290 |
+
"grad_norm": 0.0986328125,
|
| 101291 |
+
"learning_rate": 7.656077476399124e-05,
|
| 101292 |
+
"loss": 2.3003,
|
| 101293 |
+
"step": 14469
|
| 101294 |
+
},
|
| 101295 |
+
{
|
| 101296 |
+
"epoch": 0.8038888888888889,
|
| 101297 |
+
"grad_norm": 0.09912109375,
|
| 101298 |
+
"learning_rate": 7.65188662440219e-05,
|
| 101299 |
+
"loss": 2.3435,
|
| 101300 |
+
"step": 14470
|
| 101301 |
+
},
|
| 101302 |
+
{
|
| 101303 |
+
"epoch": 0.8039444444444445,
|
| 101304 |
+
"grad_norm": 0.10009765625,
|
| 101305 |
+
"learning_rate": 7.647696798415109e-05,
|
| 101306 |
+
"loss": 2.3933,
|
| 101307 |
+
"step": 14471
|
| 101308 |
+
},
|
| 101309 |
+
{
|
| 101310 |
+
"epoch": 0.804,
|
| 101311 |
+
"grad_norm": 0.1015625,
|
| 101312 |
+
"learning_rate": 7.643507998570791e-05,
|
| 101313 |
+
"loss": 2.3933,
|
| 101314 |
+
"step": 14472
|
| 101315 |
+
},
|
| 101316 |
+
{
|
| 101317 |
+
"epoch": 0.8040555555555555,
|
| 101318 |
+
"grad_norm": 0.1005859375,
|
| 101319 |
+
"learning_rate": 7.639320225002106e-05,
|
| 101320 |
+
"loss": 2.3515,
|
| 101321 |
+
"step": 14473
|
| 101322 |
+
},
|
| 101323 |
+
{
|
| 101324 |
+
"epoch": 0.8041111111111111,
|
| 101325 |
+
"grad_norm": 0.09912109375,
|
| 101326 |
+
"learning_rate": 7.635133477841869e-05,
|
| 101327 |
+
"loss": 2.3411,
|
| 101328 |
+
"step": 14474
|
| 101329 |
+
},
|
| 101330 |
+
{
|
| 101331 |
+
"epoch": 0.8041666666666667,
|
| 101332 |
+
"grad_norm": 0.09814453125,
|
| 101333 |
+
"learning_rate": 7.630947757222866e-05,
|
| 101334 |
+
"loss": 2.3484,
|
| 101335 |
+
"step": 14475
|
| 101336 |
+
},
|
| 101337 |
+
{
|
| 101338 |
+
"epoch": 0.8042222222222222,
|
| 101339 |
+
"grad_norm": 0.10009765625,
|
| 101340 |
+
"learning_rate": 7.626763063277885e-05,
|
| 101341 |
+
"loss": 2.3417,
|
| 101342 |
+
"step": 14476
|
| 101343 |
+
},
|
| 101344 |
+
{
|
| 101345 |
+
"epoch": 0.8042777777777778,
|
| 101346 |
+
"grad_norm": 0.10107421875,
|
| 101347 |
+
"learning_rate": 7.622579396139631e-05,
|
| 101348 |
+
"loss": 2.3328,
|
| 101349 |
+
"step": 14477
|
| 101350 |
+
},
|
| 101351 |
+
{
|
| 101352 |
+
"epoch": 0.8043333333333333,
|
| 101353 |
+
"grad_norm": 0.1005859375,
|
| 101354 |
+
"learning_rate": 7.6183967559408e-05,
|
| 101355 |
+
"loss": 2.4416,
|
| 101356 |
+
"step": 14478
|
| 101357 |
+
},
|
| 101358 |
+
{
|
| 101359 |
+
"epoch": 0.8043888888888889,
|
| 101360 |
+
"grad_norm": 0.1005859375,
|
| 101361 |
+
"learning_rate": 7.614215142814063e-05,
|
| 101362 |
+
"loss": 2.4193,
|
| 101363 |
+
"step": 14479
|
| 101364 |
+
},
|
| 101365 |
+
{
|
| 101366 |
+
"epoch": 0.8044444444444444,
|
| 101367 |
+
"grad_norm": 0.09912109375,
|
| 101368 |
+
"learning_rate": 7.610034556892065e-05,
|
| 101369 |
+
"loss": 2.3421,
|
| 101370 |
+
"step": 14480
|
| 101371 |
+
},
|
| 101372 |
+
{
|
| 101373 |
+
"epoch": 0.8045,
|
| 101374 |
+
"grad_norm": 0.0986328125,
|
| 101375 |
+
"learning_rate": 7.605854998307393e-05,
|
| 101376 |
+
"loss": 2.2982,
|
| 101377 |
+
"step": 14481
|
| 101378 |
+
},
|
| 101379 |
+
{
|
| 101380 |
+
"epoch": 0.8045555555555556,
|
| 101381 |
+
"grad_norm": 0.09912109375,
|
| 101382 |
+
"learning_rate": 7.6016764671926e-05,
|
| 101383 |
+
"loss": 2.3244,
|
| 101384 |
+
"step": 14482
|
| 101385 |
+
},
|
| 101386 |
+
{
|
| 101387 |
+
"epoch": 0.8046111111111112,
|
| 101388 |
+
"grad_norm": 0.099609375,
|
| 101389 |
+
"learning_rate": 7.597498963680245e-05,
|
| 101390 |
+
"loss": 2.4341,
|
| 101391 |
+
"step": 14483
|
| 101392 |
+
},
|
| 101393 |
+
{
|
| 101394 |
+
"epoch": 0.8046666666666666,
|
| 101395 |
+
"grad_norm": 0.09814453125,
|
| 101396 |
+
"learning_rate": 7.593322487902819e-05,
|
| 101397 |
+
"loss": 2.3174,
|
| 101398 |
+
"step": 14484
|
| 101399 |
+
},
|
| 101400 |
+
{
|
| 101401 |
+
"epoch": 0.8047222222222222,
|
| 101402 |
+
"grad_norm": 0.1005859375,
|
| 101403 |
+
"learning_rate": 7.589147039992775e-05,
|
| 101404 |
+
"loss": 2.3816,
|
| 101405 |
+
"step": 14485
|
| 101406 |
+
},
|
| 101407 |
+
{
|
| 101408 |
+
"epoch": 0.8047777777777778,
|
| 101409 |
+
"grad_norm": 0.09814453125,
|
| 101410 |
+
"learning_rate": 7.584972620082571e-05,
|
| 101411 |
+
"loss": 2.2999,
|
| 101412 |
+
"step": 14486
|
| 101413 |
+
},
|
| 101414 |
+
{
|
| 101415 |
+
"epoch": 0.8048333333333333,
|
| 101416 |
+
"grad_norm": 0.099609375,
|
| 101417 |
+
"learning_rate": 7.580799228304592e-05,
|
| 101418 |
+
"loss": 2.2914,
|
| 101419 |
+
"step": 14487
|
| 101420 |
+
},
|
| 101421 |
+
{
|
| 101422 |
+
"epoch": 0.8048888888888889,
|
| 101423 |
+
"grad_norm": 0.09912109375,
|
| 101424 |
+
"learning_rate": 7.576626864791223e-05,
|
| 101425 |
+
"loss": 2.2898,
|
| 101426 |
+
"step": 14488
|
| 101427 |
+
},
|
| 101428 |
+
{
|
| 101429 |
+
"epoch": 0.8049444444444445,
|
| 101430 |
+
"grad_norm": 0.09912109375,
|
| 101431 |
+
"learning_rate": 7.572455529674791e-05,
|
| 101432 |
+
"loss": 2.3218,
|
| 101433 |
+
"step": 14489
|
| 101434 |
+
},
|
| 101435 |
+
{
|
| 101436 |
+
"epoch": 0.805,
|
| 101437 |
+
"grad_norm": 0.099609375,
|
| 101438 |
+
"learning_rate": 7.568285223087621e-05,
|
| 101439 |
+
"loss": 2.3513,
|
| 101440 |
+
"step": 14490
|
| 101441 |
+
},
|
| 101442 |
+
{
|
| 101443 |
+
"epoch": 0.8050555555555555,
|
| 101444 |
+
"grad_norm": 0.1005859375,
|
| 101445 |
+
"learning_rate": 7.564115945161968e-05,
|
| 101446 |
+
"loss": 2.4135,
|
| 101447 |
+
"step": 14491
|
| 101448 |
+
},
|
| 101449 |
+
{
|
| 101450 |
+
"epoch": 0.8051111111111111,
|
| 101451 |
+
"grad_norm": 0.09912109375,
|
| 101452 |
+
"learning_rate": 7.559947696030065e-05,
|
| 101453 |
+
"loss": 2.304,
|
| 101454 |
+
"step": 14492
|
| 101455 |
+
},
|
| 101456 |
+
{
|
| 101457 |
+
"epoch": 0.8051666666666667,
|
| 101458 |
+
"grad_norm": 0.09814453125,
|
| 101459 |
+
"learning_rate": 7.555780475824143e-05,
|
| 101460 |
+
"loss": 2.3222,
|
| 101461 |
+
"step": 14493
|
| 101462 |
+
},
|
| 101463 |
+
{
|
| 101464 |
+
"epoch": 0.8052222222222222,
|
| 101465 |
+
"grad_norm": 0.09912109375,
|
| 101466 |
+
"learning_rate": 7.551614284676358e-05,
|
| 101467 |
+
"loss": 2.3268,
|
| 101468 |
+
"step": 14494
|
| 101469 |
+
},
|
| 101470 |
+
{
|
| 101471 |
+
"epoch": 0.8052777777777778,
|
| 101472 |
+
"grad_norm": 0.099609375,
|
| 101473 |
+
"learning_rate": 7.547449122718862e-05,
|
| 101474 |
+
"loss": 2.3636,
|
| 101475 |
+
"step": 14495
|
| 101476 |
+
},
|
| 101477 |
+
{
|
| 101478 |
+
"epoch": 0.8053333333333333,
|
| 101479 |
+
"grad_norm": 0.10009765625,
|
| 101480 |
+
"learning_rate": 7.543284990083766e-05,
|
| 101481 |
+
"loss": 2.3899,
|
| 101482 |
+
"step": 14496
|
| 101483 |
+
},
|
| 101484 |
+
{
|
| 101485 |
+
"epoch": 0.8053888888888889,
|
| 101486 |
+
"grad_norm": 0.099609375,
|
| 101487 |
+
"learning_rate": 7.539121886903133e-05,
|
| 101488 |
+
"loss": 2.3613,
|
| 101489 |
+
"step": 14497
|
| 101490 |
+
},
|
| 101491 |
+
{
|
| 101492 |
+
"epoch": 0.8054444444444444,
|
| 101493 |
+
"grad_norm": 0.09765625,
|
| 101494 |
+
"learning_rate": 7.534959813309015e-05,
|
| 101495 |
+
"loss": 2.2814,
|
| 101496 |
+
"step": 14498
|
| 101497 |
+
},
|
| 101498 |
+
{
|
| 101499 |
+
"epoch": 0.8055,
|
| 101500 |
+
"grad_norm": 0.0986328125,
|
| 101501 |
+
"learning_rate": 7.530798769433438e-05,
|
| 101502 |
+
"loss": 2.3062,
|
| 101503 |
+
"step": 14499
|
| 101504 |
+
},
|
| 101505 |
+
{
|
| 101506 |
+
"epoch": 0.8055555555555556,
|
| 101507 |
+
"grad_norm": 0.0986328125,
|
| 101508 |
+
"learning_rate": 7.526638755408364e-05,
|
| 101509 |
+
"loss": 2.328,
|
| 101510 |
+
"step": 14500
|
| 101511 |
+
},
|
| 101512 |
+
{
|
| 101513 |
+
"epoch": 0.8056111111111111,
|
| 101514 |
+
"grad_norm": 0.0986328125,
|
| 101515 |
+
"learning_rate": 7.52247977136574e-05,
|
| 101516 |
+
"loss": 2.3265,
|
| 101517 |
+
"step": 14501
|
| 101518 |
+
},
|
| 101519 |
+
{
|
| 101520 |
+
"epoch": 0.8056666666666666,
|
| 101521 |
+
"grad_norm": 0.10107421875,
|
| 101522 |
+
"learning_rate": 7.518321817437488e-05,
|
| 101523 |
+
"loss": 2.3939,
|
| 101524 |
+
"step": 14502
|
| 101525 |
+
},
|
| 101526 |
+
{
|
| 101527 |
+
"epoch": 0.8057222222222222,
|
| 101528 |
+
"grad_norm": 0.111328125,
|
| 101529 |
+
"learning_rate": 7.514164893755489e-05,
|
| 101530 |
+
"loss": 2.3903,
|
| 101531 |
+
"step": 14503
|
| 101532 |
+
},
|
| 101533 |
+
{
|
| 101534 |
+
"epoch": 0.8057777777777778,
|
| 101535 |
+
"grad_norm": 0.09912109375,
|
| 101536 |
+
"learning_rate": 7.510009000451574e-05,
|
| 101537 |
+
"loss": 2.3167,
|
| 101538 |
+
"step": 14504
|
| 101539 |
+
},
|
| 101540 |
+
{
|
| 101541 |
+
"epoch": 0.8058333333333333,
|
| 101542 |
+
"grad_norm": 0.099609375,
|
| 101543 |
+
"learning_rate": 7.505854137657573e-05,
|
| 101544 |
+
"loss": 2.2728,
|
| 101545 |
+
"step": 14505
|
| 101546 |
+
},
|
| 101547 |
+
{
|
| 101548 |
+
"epoch": 0.8058888888888889,
|
| 101549 |
+
"grad_norm": 0.10009765625,
|
| 101550 |
+
"learning_rate": 7.501700305505286e-05,
|
| 101551 |
+
"loss": 2.3486,
|
| 101552 |
+
"step": 14506
|
| 101553 |
+
},
|
| 101554 |
+
{
|
| 101555 |
+
"epoch": 0.8059444444444445,
|
| 101556 |
+
"grad_norm": 0.099609375,
|
| 101557 |
+
"learning_rate": 7.497547504126429e-05,
|
| 101558 |
+
"loss": 2.3699,
|
| 101559 |
+
"step": 14507
|
| 101560 |
+
},
|
| 101561 |
+
{
|
| 101562 |
+
"epoch": 0.806,
|
| 101563 |
+
"grad_norm": 0.099609375,
|
| 101564 |
+
"learning_rate": 7.493395733652731e-05,
|
| 101565 |
+
"loss": 2.2884,
|
| 101566 |
+
"step": 14508
|
| 101567 |
+
},
|
| 101568 |
+
{
|
| 101569 |
+
"epoch": 0.8060555555555555,
|
| 101570 |
+
"grad_norm": 0.1005859375,
|
| 101571 |
+
"learning_rate": 7.489244994215896e-05,
|
| 101572 |
+
"loss": 2.3317,
|
| 101573 |
+
"step": 14509
|
| 101574 |
+
},
|
| 101575 |
+
{
|
| 101576 |
+
"epoch": 0.8061111111111111,
|
| 101577 |
+
"grad_norm": 0.10009765625,
|
| 101578 |
+
"learning_rate": 7.485095285947558e-05,
|
| 101579 |
+
"loss": 2.3776,
|
| 101580 |
+
"step": 14510
|
| 101581 |
+
},
|
| 101582 |
+
{
|
| 101583 |
+
"epoch": 0.8061666666666667,
|
| 101584 |
+
"grad_norm": 0.09765625,
|
| 101585 |
+
"learning_rate": 7.480946608979329e-05,
|
| 101586 |
+
"loss": 2.3255,
|
| 101587 |
+
"step": 14511
|
| 101588 |
+
},
|
| 101589 |
+
{
|
| 101590 |
+
"epoch": 0.8062222222222222,
|
| 101591 |
+
"grad_norm": 0.099609375,
|
| 101592 |
+
"learning_rate": 7.476798963442817e-05,
|
| 101593 |
+
"loss": 2.2406,
|
| 101594 |
+
"step": 14512
|
| 101595 |
+
},
|
| 101596 |
+
{
|
| 101597 |
+
"epoch": 0.8062777777777778,
|
| 101598 |
+
"grad_norm": 0.09765625,
|
| 101599 |
+
"learning_rate": 7.472652349469571e-05,
|
| 101600 |
+
"loss": 2.3366,
|
| 101601 |
+
"step": 14513
|
| 101602 |
+
},
|
| 101603 |
+
{
|
| 101604 |
+
"epoch": 0.8063333333333333,
|
| 101605 |
+
"grad_norm": 0.1005859375,
|
| 101606 |
+
"learning_rate": 7.46850676719109e-05,
|
| 101607 |
+
"loss": 2.4019,
|
| 101608 |
+
"step": 14514
|
| 101609 |
+
},
|
| 101610 |
+
{
|
| 101611 |
+
"epoch": 0.8063888888888889,
|
| 101612 |
+
"grad_norm": 0.09814453125,
|
| 101613 |
+
"learning_rate": 7.464362216738883e-05,
|
| 101614 |
+
"loss": 2.3558,
|
| 101615 |
+
"step": 14515
|
| 101616 |
+
},
|
| 101617 |
+
{
|
| 101618 |
+
"epoch": 0.8064444444444444,
|
| 101619 |
+
"grad_norm": 0.10009765625,
|
| 101620 |
+
"learning_rate": 7.460218698244426e-05,
|
| 101621 |
+
"loss": 2.3209,
|
| 101622 |
+
"step": 14516
|
| 101623 |
+
},
|
| 101624 |
+
{
|
| 101625 |
+
"epoch": 0.8065,
|
| 101626 |
+
"grad_norm": 0.09912109375,
|
| 101627 |
+
"learning_rate": 7.456076211839094e-05,
|
| 101628 |
+
"loss": 2.332,
|
| 101629 |
+
"step": 14517
|
| 101630 |
+
},
|
| 101631 |
+
{
|
| 101632 |
+
"epoch": 0.8065555555555556,
|
| 101633 |
+
"grad_norm": 0.10009765625,
|
| 101634 |
+
"learning_rate": 7.451934757654302e-05,
|
| 101635 |
+
"loss": 2.3643,
|
| 101636 |
+
"step": 14518
|
| 101637 |
+
},
|
| 101638 |
+
{
|
| 101639 |
+
"epoch": 0.8066111111111111,
|
| 101640 |
+
"grad_norm": 0.09912109375,
|
| 101641 |
+
"learning_rate": 7.44779433582142e-05,
|
| 101642 |
+
"loss": 2.3608,
|
| 101643 |
+
"step": 14519
|
| 101644 |
+
},
|
| 101645 |
+
{
|
| 101646 |
+
"epoch": 0.8066666666666666,
|
| 101647 |
+
"grad_norm": 0.10107421875,
|
| 101648 |
+
"learning_rate": 7.443654946471763e-05,
|
| 101649 |
+
"loss": 2.3917,
|
| 101650 |
+
"step": 14520
|
| 101651 |
+
},
|
| 101652 |
+
{
|
| 101653 |
+
"epoch": 0.8067222222222222,
|
| 101654 |
+
"grad_norm": 0.10009765625,
|
| 101655 |
+
"learning_rate": 7.439516589736606e-05,
|
| 101656 |
+
"loss": 2.3585,
|
| 101657 |
+
"step": 14521
|
| 101658 |
+
},
|
| 101659 |
+
{
|
| 101660 |
+
"epoch": 0.8067777777777778,
|
| 101661 |
+
"grad_norm": 0.0986328125,
|
| 101662 |
+
"learning_rate": 7.435379265747236e-05,
|
| 101663 |
+
"loss": 2.3134,
|
| 101664 |
+
"step": 14522
|
| 101665 |
+
},
|
| 101666 |
+
{
|
| 101667 |
+
"epoch": 0.8068333333333333,
|
| 101668 |
+
"grad_norm": 0.09912109375,
|
| 101669 |
+
"learning_rate": 7.431242974634867e-05,
|
| 101670 |
+
"loss": 2.2831,
|
| 101671 |
+
"step": 14523
|
| 101672 |
+
},
|
| 101673 |
+
{
|
| 101674 |
+
"epoch": 0.8068888888888889,
|
| 101675 |
+
"grad_norm": 0.1015625,
|
| 101676 |
+
"learning_rate": 7.427107716530684e-05,
|
| 101677 |
+
"loss": 2.3366,
|
| 101678 |
+
"step": 14524
|
| 101679 |
+
},
|
| 101680 |
+
{
|
| 101681 |
+
"epoch": 0.8069444444444445,
|
| 101682 |
+
"grad_norm": 0.09814453125,
|
| 101683 |
+
"learning_rate": 7.422973491565852e-05,
|
| 101684 |
+
"loss": 2.3177,
|
| 101685 |
+
"step": 14525
|
| 101686 |
+
},
|
| 101687 |
+
{
|
| 101688 |
+
"epoch": 0.807,
|
| 101689 |
+
"grad_norm": 0.09814453125,
|
| 101690 |
+
"learning_rate": 7.418840299871521e-05,
|
| 101691 |
+
"loss": 2.3494,
|
| 101692 |
+
"step": 14526
|
| 101693 |
+
},
|
| 101694 |
+
{
|
| 101695 |
+
"epoch": 0.8070555555555555,
|
| 101696 |
+
"grad_norm": 0.1015625,
|
| 101697 |
+
"learning_rate": 7.414708141578768e-05,
|
| 101698 |
+
"loss": 2.3936,
|
| 101699 |
+
"step": 14527
|
| 101700 |
+
},
|
| 101701 |
+
{
|
| 101702 |
+
"epoch": 0.8071111111111111,
|
| 101703 |
+
"grad_norm": 0.10009765625,
|
| 101704 |
+
"learning_rate": 7.410577016818648e-05,
|
| 101705 |
+
"loss": 2.411,
|
| 101706 |
+
"step": 14528
|
| 101707 |
+
},
|
| 101708 |
+
{
|
| 101709 |
+
"epoch": 0.8071666666666667,
|
| 101710 |
+
"grad_norm": 0.099609375,
|
| 101711 |
+
"learning_rate": 7.40644692572221e-05,
|
| 101712 |
+
"loss": 2.3864,
|
| 101713 |
+
"step": 14529
|
| 101714 |
+
},
|
| 101715 |
+
{
|
| 101716 |
+
"epoch": 0.8072222222222222,
|
| 101717 |
+
"grad_norm": 0.09912109375,
|
| 101718 |
+
"learning_rate": 7.402317868420442e-05,
|
| 101719 |
+
"loss": 2.3339,
|
| 101720 |
+
"step": 14530
|
| 101721 |
+
},
|
| 101722 |
+
{
|
| 101723 |
+
"epoch": 0.8072777777777778,
|
| 101724 |
+
"grad_norm": 0.09912109375,
|
| 101725 |
+
"learning_rate": 7.398189845044297e-05,
|
| 101726 |
+
"loss": 2.2965,
|
| 101727 |
+
"step": 14531
|
| 101728 |
+
},
|
| 101729 |
+
{
|
| 101730 |
+
"epoch": 0.8073333333333333,
|
| 101731 |
+
"grad_norm": 0.10009765625,
|
| 101732 |
+
"learning_rate": 7.394062855724731e-05,
|
| 101733 |
+
"loss": 2.3592,
|
| 101734 |
+
"step": 14532
|
| 101735 |
+
},
|
| 101736 |
+
{
|
| 101737 |
+
"epoch": 0.8073888888888889,
|
| 101738 |
+
"grad_norm": 0.10009765625,
|
| 101739 |
+
"learning_rate": 7.38993690059262e-05,
|
| 101740 |
+
"loss": 2.3612,
|
| 101741 |
+
"step": 14533
|
| 101742 |
+
},
|
| 101743 |
+
{
|
| 101744 |
+
"epoch": 0.8074444444444444,
|
| 101745 |
+
"grad_norm": 0.10009765625,
|
| 101746 |
+
"learning_rate": 7.385811979778852e-05,
|
| 101747 |
+
"loss": 2.4096,
|
| 101748 |
+
"step": 14534
|
| 101749 |
+
},
|
| 101750 |
+
{
|
| 101751 |
+
"epoch": 0.8075,
|
| 101752 |
+
"grad_norm": 0.09912109375,
|
| 101753 |
+
"learning_rate": 7.381688093414236e-05,
|
| 101754 |
+
"loss": 2.2956,
|
| 101755 |
+
"step": 14535
|
| 101756 |
+
},
|
| 101757 |
+
{
|
| 101758 |
+
"epoch": 0.8075555555555556,
|
| 101759 |
+
"grad_norm": 0.09814453125,
|
| 101760 |
+
"learning_rate": 7.377565241629593e-05,
|
| 101761 |
+
"loss": 2.2402,
|
| 101762 |
+
"step": 14536
|
| 101763 |
+
},
|
| 101764 |
+
{
|
| 101765 |
+
"epoch": 0.8076111111111111,
|
| 101766 |
+
"grad_norm": 0.099609375,
|
| 101767 |
+
"learning_rate": 7.373443424555691e-05,
|
| 101768 |
+
"loss": 2.3239,
|
| 101769 |
+
"step": 14537
|
| 101770 |
+
},
|
| 101771 |
+
{
|
| 101772 |
+
"epoch": 0.8076666666666666,
|
| 101773 |
+
"grad_norm": 0.09912109375,
|
| 101774 |
+
"learning_rate": 7.369322642323239e-05,
|
| 101775 |
+
"loss": 2.3736,
|
| 101776 |
+
"step": 14538
|
| 101777 |
+
},
|
| 101778 |
+
{
|
| 101779 |
+
"epoch": 0.8077222222222222,
|
| 101780 |
+
"grad_norm": 0.1015625,
|
| 101781 |
+
"learning_rate": 7.365202895062969e-05,
|
| 101782 |
+
"loss": 2.4267,
|
| 101783 |
+
"step": 14539
|
| 101784 |
+
},
|
| 101785 |
+
{
|
| 101786 |
+
"epoch": 0.8077777777777778,
|
| 101787 |
+
"grad_norm": 0.09912109375,
|
| 101788 |
+
"learning_rate": 7.361084182905527e-05,
|
| 101789 |
+
"loss": 2.3517,
|
| 101790 |
+
"step": 14540
|
| 101791 |
+
},
|
| 101792 |
+
{
|
| 101793 |
+
"epoch": 0.8078333333333333,
|
| 101794 |
+
"grad_norm": 0.0986328125,
|
| 101795 |
+
"learning_rate": 7.35696650598157e-05,
|
| 101796 |
+
"loss": 2.2846,
|
| 101797 |
+
"step": 14541
|
| 101798 |
+
},
|
| 101799 |
+
{
|
| 101800 |
+
"epoch": 0.8078888888888889,
|
| 101801 |
+
"grad_norm": 0.09912109375,
|
| 101802 |
+
"learning_rate": 7.352849864421693e-05,
|
| 101803 |
+
"loss": 2.3036,
|
| 101804 |
+
"step": 14542
|
| 101805 |
+
},
|
| 101806 |
+
{
|
| 101807 |
+
"epoch": 0.8079444444444445,
|
| 101808 |
+
"grad_norm": 0.0986328125,
|
| 101809 |
+
"learning_rate": 7.348734258356454e-05,
|
| 101810 |
+
"loss": 2.2497,
|
| 101811 |
+
"step": 14543
|
| 101812 |
+
},
|
| 101813 |
+
{
|
| 101814 |
+
"epoch": 0.808,
|
| 101815 |
+
"grad_norm": 0.10009765625,
|
| 101816 |
+
"learning_rate": 7.344619687916412e-05,
|
| 101817 |
+
"loss": 2.4158,
|
| 101818 |
+
"step": 14544
|
| 101819 |
+
},
|
| 101820 |
+
{
|
| 101821 |
+
"epoch": 0.8080555555555555,
|
| 101822 |
+
"grad_norm": 0.10009765625,
|
| 101823 |
+
"learning_rate": 7.340506153232052e-05,
|
| 101824 |
+
"loss": 2.4302,
|
| 101825 |
+
"step": 14545
|
| 101826 |
+
},
|
| 101827 |
+
{
|
| 101828 |
+
"epoch": 0.8081111111111111,
|
| 101829 |
+
"grad_norm": 0.1005859375,
|
| 101830 |
+
"learning_rate": 7.336393654433869e-05,
|
| 101831 |
+
"loss": 2.4032,
|
| 101832 |
+
"step": 14546
|
| 101833 |
+
},
|
| 101834 |
+
{
|
| 101835 |
+
"epoch": 0.8081666666666667,
|
| 101836 |
+
"grad_norm": 0.09912109375,
|
| 101837 |
+
"learning_rate": 7.332282191652273e-05,
|
| 101838 |
+
"loss": 2.306,
|
| 101839 |
+
"step": 14547
|
| 101840 |
+
},
|
| 101841 |
+
{
|
| 101842 |
+
"epoch": 0.8082222222222222,
|
| 101843 |
+
"grad_norm": 0.099609375,
|
| 101844 |
+
"learning_rate": 7.328171765017704e-05,
|
| 101845 |
+
"loss": 2.2802,
|
| 101846 |
+
"step": 14548
|
| 101847 |
+
},
|
| 101848 |
+
{
|
| 101849 |
+
"epoch": 0.8082777777777778,
|
| 101850 |
+
"grad_norm": 0.1005859375,
|
| 101851 |
+
"learning_rate": 7.324062374660514e-05,
|
| 101852 |
+
"loss": 2.3255,
|
| 101853 |
+
"step": 14549
|
| 101854 |
+
},
|
| 101855 |
+
{
|
| 101856 |
+
"epoch": 0.8083333333333333,
|
| 101857 |
+
"grad_norm": 0.09912109375,
|
| 101858 |
+
"learning_rate": 7.31995402071104e-05,
|
| 101859 |
+
"loss": 2.3337,
|
| 101860 |
+
"step": 14550
|
| 101861 |
+
},
|
| 101862 |
+
{
|
| 101863 |
+
"epoch": 0.8083888888888889,
|
| 101864 |
+
"grad_norm": 0.10107421875,
|
| 101865 |
+
"learning_rate": 7.315846703299606e-05,
|
| 101866 |
+
"loss": 2.3872,
|
| 101867 |
+
"step": 14551
|
| 101868 |
+
},
|
| 101869 |
+
{
|
| 101870 |
+
"epoch": 0.8084444444444444,
|
| 101871 |
+
"grad_norm": 0.099609375,
|
| 101872 |
+
"learning_rate": 7.311740422556482e-05,
|
| 101873 |
+
"loss": 2.3763,
|
| 101874 |
+
"step": 14552
|
| 101875 |
+
},
|
| 101876 |
+
{
|
| 101877 |
+
"epoch": 0.8085,
|
| 101878 |
+
"grad_norm": 0.09814453125,
|
| 101879 |
+
"learning_rate": 7.307635178611896e-05,
|
| 101880 |
+
"loss": 2.3628,
|
| 101881 |
+
"step": 14553
|
| 101882 |
+
},
|
| 101883 |
+
{
|
| 101884 |
+
"epoch": 0.8085555555555556,
|
| 101885 |
+
"grad_norm": 0.0986328125,
|
| 101886 |
+
"learning_rate": 7.303530971596066e-05,
|
| 101887 |
+
"loss": 2.2737,
|
| 101888 |
+
"step": 14554
|
| 101889 |
+
},
|
| 101890 |
+
{
|
| 101891 |
+
"epoch": 0.8086111111111111,
|
| 101892 |
+
"grad_norm": 0.1005859375,
|
| 101893 |
+
"learning_rate": 7.299427801639182e-05,
|
| 101894 |
+
"loss": 2.3808,
|
| 101895 |
+
"step": 14555
|
| 101896 |
+
},
|
| 101897 |
+
{
|
| 101898 |
+
"epoch": 0.8086666666666666,
|
| 101899 |
+
"grad_norm": 0.09912109375,
|
| 101900 |
+
"learning_rate": 7.295325668871376e-05,
|
| 101901 |
+
"loss": 2.3389,
|
| 101902 |
+
"step": 14556
|
| 101903 |
+
},
|
| 101904 |
+
{
|
| 101905 |
+
"epoch": 0.8087222222222222,
|
| 101906 |
+
"grad_norm": 0.09814453125,
|
| 101907 |
+
"learning_rate": 7.291224573422746e-05,
|
| 101908 |
+
"loss": 2.3723,
|
| 101909 |
+
"step": 14557
|
| 101910 |
+
},
|
| 101911 |
+
{
|
| 101912 |
+
"epoch": 0.8087777777777778,
|
| 101913 |
+
"grad_norm": 0.1015625,
|
| 101914 |
+
"learning_rate": 7.287124515423394e-05,
|
| 101915 |
+
"loss": 2.4259,
|
| 101916 |
+
"step": 14558
|
| 101917 |
+
},
|
| 101918 |
+
{
|
| 101919 |
+
"epoch": 0.8088333333333333,
|
| 101920 |
+
"grad_norm": 0.09912109375,
|
| 101921 |
+
"learning_rate": 7.283025495003353e-05,
|
| 101922 |
+
"loss": 2.3539,
|
| 101923 |
+
"step": 14559
|
| 101924 |
+
},
|
| 101925 |
+
{
|
| 101926 |
+
"epoch": 0.8088888888888889,
|
| 101927 |
+
"grad_norm": 0.10009765625,
|
| 101928 |
+
"learning_rate": 7.278927512292621e-05,
|
| 101929 |
+
"loss": 2.3304,
|
| 101930 |
+
"step": 14560
|
| 101931 |
+
},
|
| 101932 |
+
{
|
| 101933 |
+
"epoch": 0.8089444444444445,
|
| 101934 |
+
"grad_norm": 0.1005859375,
|
| 101935 |
+
"learning_rate": 7.27483056742119e-05,
|
| 101936 |
+
"loss": 2.3769,
|
| 101937 |
+
"step": 14561
|
| 101938 |
+
},
|
| 101939 |
+
{
|
| 101940 |
+
"epoch": 0.809,
|
| 101941 |
+
"grad_norm": 0.099609375,
|
| 101942 |
+
"learning_rate": 7.270734660519028e-05,
|
| 101943 |
+
"loss": 2.3486,
|
| 101944 |
+
"step": 14562
|
| 101945 |
+
},
|
| 101946 |
+
{
|
| 101947 |
+
"epoch": 0.8090555555555555,
|
| 101948 |
+
"grad_norm": 0.10009765625,
|
| 101949 |
+
"learning_rate": 7.266639791716005e-05,
|
| 101950 |
+
"loss": 2.3934,
|
| 101951 |
+
"step": 14563
|
| 101952 |
+
},
|
| 101953 |
+
{
|
| 101954 |
+
"epoch": 0.8091111111111111,
|
| 101955 |
+
"grad_norm": 0.10009765625,
|
| 101956 |
+
"learning_rate": 7.262545961142024e-05,
|
| 101957 |
+
"loss": 2.3281,
|
| 101958 |
+
"step": 14564
|
| 101959 |
+
},
|
| 101960 |
+
{
|
| 101961 |
+
"epoch": 0.8091666666666667,
|
| 101962 |
+
"grad_norm": 0.1015625,
|
| 101963 |
+
"learning_rate": 7.25845316892694e-05,
|
| 101964 |
+
"loss": 2.4084,
|
| 101965 |
+
"step": 14565
|
| 101966 |
+
},
|
| 101967 |
+
{
|
| 101968 |
+
"epoch": 0.8092222222222222,
|
| 101969 |
+
"grad_norm": 0.099609375,
|
| 101970 |
+
"learning_rate": 7.254361415200559e-05,
|
| 101971 |
+
"loss": 2.3536,
|
| 101972 |
+
"step": 14566
|
| 101973 |
+
},
|
| 101974 |
+
{
|
| 101975 |
+
"epoch": 0.8092777777777778,
|
| 101976 |
+
"grad_norm": 0.09912109375,
|
| 101977 |
+
"learning_rate": 7.250270700092645e-05,
|
| 101978 |
+
"loss": 2.349,
|
| 101979 |
+
"step": 14567
|
| 101980 |
+
},
|
| 101981 |
+
{
|
| 101982 |
+
"epoch": 0.8093333333333333,
|
| 101983 |
+
"grad_norm": 0.0986328125,
|
| 101984 |
+
"learning_rate": 7.246181023732978e-05,
|
| 101985 |
+
"loss": 2.3395,
|
| 101986 |
+
"step": 14568
|
| 101987 |
+
},
|
| 101988 |
+
{
|
| 101989 |
+
"epoch": 0.8093888888888889,
|
| 101990 |
+
"grad_norm": 0.099609375,
|
| 101991 |
+
"learning_rate": 7.242092386251256e-05,
|
| 101992 |
+
"loss": 2.3649,
|
| 101993 |
+
"step": 14569
|
| 101994 |
+
},
|
| 101995 |
+
{
|
| 101996 |
+
"epoch": 0.8094444444444444,
|
| 101997 |
+
"grad_norm": 0.1005859375,
|
| 101998 |
+
"learning_rate": 7.238004787777151e-05,
|
| 101999 |
+
"loss": 2.3792,
|
| 102000 |
+
"step": 14570
|
| 102001 |
+
},
|
| 102002 |
+
{
|
| 102003 |
+
"epoch": 0.8095,
|
| 102004 |
+
"grad_norm": 0.1015625,
|
| 102005 |
+
"learning_rate": 7.233918228440324e-05,
|
| 102006 |
+
"loss": 2.3552,
|
| 102007 |
+
"step": 14571
|
| 102008 |
+
},
|
| 102009 |
+
{
|
| 102010 |
+
"epoch": 0.8095555555555556,
|
| 102011 |
+
"grad_norm": 0.09814453125,
|
| 102012 |
+
"learning_rate": 7.229832708370415e-05,
|
| 102013 |
+
"loss": 2.3548,
|
| 102014 |
+
"step": 14572
|
| 102015 |
+
},
|
| 102016 |
+
{
|
| 102017 |
+
"epoch": 0.8096111111111111,
|
| 102018 |
+
"grad_norm": 0.09912109375,
|
| 102019 |
+
"learning_rate": 7.225748227696962e-05,
|
| 102020 |
+
"loss": 2.3266,
|
| 102021 |
+
"step": 14573
|
| 102022 |
+
},
|
| 102023 |
+
{
|
| 102024 |
+
"epoch": 0.8096666666666666,
|
| 102025 |
+
"grad_norm": 0.09814453125,
|
| 102026 |
+
"learning_rate": 7.22166478654954e-05,
|
| 102027 |
+
"loss": 2.2877,
|
| 102028 |
+
"step": 14574
|
| 102029 |
+
},
|
| 102030 |
+
{
|
| 102031 |
+
"epoch": 0.8097222222222222,
|
| 102032 |
+
"grad_norm": 0.0986328125,
|
| 102033 |
+
"learning_rate": 7.217582385057675e-05,
|
| 102034 |
+
"loss": 2.2934,
|
| 102035 |
+
"step": 14575
|
| 102036 |
+
},
|
| 102037 |
+
{
|
| 102038 |
+
"epoch": 0.8097777777777778,
|
| 102039 |
+
"grad_norm": 0.1005859375,
|
| 102040 |
+
"learning_rate": 7.213501023350842e-05,
|
| 102041 |
+
"loss": 2.3059,
|
| 102042 |
+
"step": 14576
|
| 102043 |
+
},
|
| 102044 |
+
{
|
| 102045 |
+
"epoch": 0.8098333333333333,
|
| 102046 |
+
"grad_norm": 0.10009765625,
|
| 102047 |
+
"learning_rate": 7.209420701558479e-05,
|
| 102048 |
+
"loss": 2.3783,
|
| 102049 |
+
"step": 14577
|
| 102050 |
+
},
|
| 102051 |
+
{
|
| 102052 |
+
"epoch": 0.8098888888888889,
|
| 102053 |
+
"grad_norm": 0.1005859375,
|
| 102054 |
+
"learning_rate": 7.205341419810028e-05,
|
| 102055 |
+
"loss": 2.4268,
|
| 102056 |
+
"step": 14578
|
| 102057 |
+
},
|
| 102058 |
+
{
|
| 102059 |
+
"epoch": 0.8099444444444445,
|
| 102060 |
+
"grad_norm": 0.1005859375,
|
| 102061 |
+
"learning_rate": 7.201263178234858e-05,
|
| 102062 |
+
"loss": 2.4194,
|
| 102063 |
+
"step": 14579
|
| 102064 |
+
},
|
| 102065 |
+
{
|
| 102066 |
+
"epoch": 0.81,
|
| 102067 |
+
"grad_norm": 0.10009765625,
|
| 102068 |
+
"learning_rate": 7.197185976962337e-05,
|
| 102069 |
+
"loss": 2.3589,
|
| 102070 |
+
"step": 14580
|
| 102071 |
+
},
|
| 102072 |
+
{
|
| 102073 |
+
"epoch": 0.8100555555555555,
|
| 102074 |
+
"grad_norm": 0.099609375,
|
| 102075 |
+
"learning_rate": 7.193109816121761e-05,
|
| 102076 |
+
"loss": 2.3314,
|
| 102077 |
+
"step": 14581
|
| 102078 |
+
},
|
| 102079 |
+
{
|
| 102080 |
+
"epoch": 0.8101111111111111,
|
| 102081 |
+
"grad_norm": 0.09912109375,
|
| 102082 |
+
"learning_rate": 7.189034695842449e-05,
|
| 102083 |
+
"loss": 2.336,
|
| 102084 |
+
"step": 14582
|
| 102085 |
+
},
|
| 102086 |
+
{
|
| 102087 |
+
"epoch": 0.8101666666666667,
|
| 102088 |
+
"grad_norm": 0.09912109375,
|
| 102089 |
+
"learning_rate": 7.184960616253631e-05,
|
| 102090 |
+
"loss": 2.3114,
|
| 102091 |
+
"step": 14583
|
| 102092 |
+
},
|
| 102093 |
+
{
|
| 102094 |
+
"epoch": 0.8102222222222222,
|
| 102095 |
+
"grad_norm": 0.10107421875,
|
| 102096 |
+
"learning_rate": 7.180887577484528e-05,
|
| 102097 |
+
"loss": 2.422,
|
| 102098 |
+
"step": 14584
|
| 102099 |
+
},
|
| 102100 |
+
{
|
| 102101 |
+
"epoch": 0.8102777777777778,
|
| 102102 |
+
"grad_norm": 0.099609375,
|
| 102103 |
+
"learning_rate": 7.176815579664342e-05,
|
| 102104 |
+
"loss": 2.3149,
|
| 102105 |
+
"step": 14585
|
| 102106 |
+
},
|
| 102107 |
+
{
|
| 102108 |
+
"epoch": 0.8103333333333333,
|
| 102109 |
+
"grad_norm": 0.1015625,
|
| 102110 |
+
"learning_rate": 7.172744622922211e-05,
|
| 102111 |
+
"loss": 2.3686,
|
| 102112 |
+
"step": 14586
|
| 102113 |
+
},
|
| 102114 |
+
{
|
| 102115 |
+
"epoch": 0.8103888888888889,
|
| 102116 |
+
"grad_norm": 0.099609375,
|
| 102117 |
+
"learning_rate": 7.168674707387273e-05,
|
| 102118 |
+
"loss": 2.3624,
|
| 102119 |
+
"step": 14587
|
| 102120 |
+
},
|
| 102121 |
+
{
|
| 102122 |
+
"epoch": 0.8104444444444444,
|
| 102123 |
+
"grad_norm": 0.10302734375,
|
| 102124 |
+
"learning_rate": 7.164605833188614e-05,
|
| 102125 |
+
"loss": 2.4414,
|
| 102126 |
+
"step": 14588
|
| 102127 |
+
},
|
| 102128 |
+
{
|
| 102129 |
+
"epoch": 0.8105,
|
| 102130 |
+
"grad_norm": 0.09814453125,
|
| 102131 |
+
"learning_rate": 7.160538000455268e-05,
|
| 102132 |
+
"loss": 2.3085,
|
| 102133 |
+
"step": 14589
|
| 102134 |
+
},
|
| 102135 |
+
{
|
| 102136 |
+
"epoch": 0.8105555555555556,
|
| 102137 |
+
"grad_norm": 0.099609375,
|
| 102138 |
+
"learning_rate": 7.156471209316285e-05,
|
| 102139 |
+
"loss": 2.3599,
|
| 102140 |
+
"step": 14590
|
| 102141 |
+
},
|
| 102142 |
+
{
|
| 102143 |
+
"epoch": 0.8106111111111111,
|
| 102144 |
+
"grad_norm": 0.09814453125,
|
| 102145 |
+
"learning_rate": 7.152405459900635e-05,
|
| 102146 |
+
"loss": 2.2715,
|
| 102147 |
+
"step": 14591
|
| 102148 |
+
},
|
| 102149 |
+
{
|
| 102150 |
+
"epoch": 0.8106666666666666,
|
| 102151 |
+
"grad_norm": 0.1005859375,
|
| 102152 |
+
"learning_rate": 7.148340752337292e-05,
|
| 102153 |
+
"loss": 2.3563,
|
| 102154 |
+
"step": 14592
|
| 102155 |
+
},
|
| 102156 |
+
{
|
| 102157 |
+
"epoch": 0.8107222222222222,
|
| 102158 |
+
"grad_norm": 0.099609375,
|
| 102159 |
+
"learning_rate": 7.144277086755162e-05,
|
| 102160 |
+
"loss": 2.3484,
|
| 102161 |
+
"step": 14593
|
| 102162 |
+
},
|
| 102163 |
+
{
|
| 102164 |
+
"epoch": 0.8107777777777778,
|
| 102165 |
+
"grad_norm": 0.10302734375,
|
| 102166 |
+
"learning_rate": 7.14021446328315e-05,
|
| 102167 |
+
"loss": 2.433,
|
| 102168 |
+
"step": 14594
|
| 102169 |
+
},
|
| 102170 |
+
{
|
| 102171 |
+
"epoch": 0.8108333333333333,
|
| 102172 |
+
"grad_norm": 0.09912109375,
|
| 102173 |
+
"learning_rate": 7.136152882050108e-05,
|
| 102174 |
+
"loss": 2.2778,
|
| 102175 |
+
"step": 14595
|
| 102176 |
+
},
|
| 102177 |
+
{
|
| 102178 |
+
"epoch": 0.8108888888888889,
|
| 102179 |
+
"grad_norm": 0.09814453125,
|
| 102180 |
+
"learning_rate": 7.132092343184851e-05,
|
| 102181 |
+
"loss": 2.2624,
|
| 102182 |
+
"step": 14596
|
| 102183 |
+
},
|
| 102184 |
+
{
|
| 102185 |
+
"epoch": 0.8109444444444445,
|
| 102186 |
+
"grad_norm": 0.09765625,
|
| 102187 |
+
"learning_rate": 7.128032846816189e-05,
|
| 102188 |
+
"loss": 2.3192,
|
| 102189 |
+
"step": 14597
|
| 102190 |
+
},
|
| 102191 |
+
{
|
| 102192 |
+
"epoch": 0.811,
|
| 102193 |
+
"grad_norm": 0.09912109375,
|
| 102194 |
+
"learning_rate": 7.123974393072868e-05,
|
| 102195 |
+
"loss": 2.3167,
|
| 102196 |
+
"step": 14598
|
| 102197 |
+
},
|
| 102198 |
+
{
|
| 102199 |
+
"epoch": 0.8110555555555555,
|
| 102200 |
+
"grad_norm": 0.099609375,
|
| 102201 |
+
"learning_rate": 7.119916982083607e-05,
|
| 102202 |
+
"loss": 2.3443,
|
| 102203 |
+
"step": 14599
|
| 102204 |
+
},
|
| 102205 |
+
{
|
| 102206 |
+
"epoch": 0.8111111111111111,
|
| 102207 |
+
"grad_norm": 0.1005859375,
|
| 102208 |
+
"learning_rate": 7.115860613977105e-05,
|
| 102209 |
+
"loss": 2.4183,
|
| 102210 |
+
"step": 14600
|
| 102211 |
+
},
|
| 102212 |
+
{
|
| 102213 |
+
"epoch": 0.8111666666666667,
|
| 102214 |
+
"grad_norm": 0.0986328125,
|
| 102215 |
+
"learning_rate": 7.111805288882032e-05,
|
| 102216 |
+
"loss": 2.3577,
|
| 102217 |
+
"step": 14601
|
| 102218 |
+
},
|
| 102219 |
+
{
|
| 102220 |
+
"epoch": 0.8112222222222222,
|
| 102221 |
+
"grad_norm": 0.09912109375,
|
| 102222 |
+
"learning_rate": 7.107751006927e-05,
|
| 102223 |
+
"loss": 2.3745,
|
| 102224 |
+
"step": 14602
|
| 102225 |
+
},
|
| 102226 |
+
{
|
| 102227 |
+
"epoch": 0.8112777777777778,
|
| 102228 |
+
"grad_norm": 0.099609375,
|
| 102229 |
+
"learning_rate": 7.103697768240598e-05,
|
| 102230 |
+
"loss": 2.3525,
|
| 102231 |
+
"step": 14603
|
| 102232 |
+
},
|
| 102233 |
+
{
|
| 102234 |
+
"epoch": 0.8113333333333334,
|
| 102235 |
+
"grad_norm": 0.099609375,
|
| 102236 |
+
"learning_rate": 7.0996455729514e-05,
|
| 102237 |
+
"loss": 2.345,
|
| 102238 |
+
"step": 14604
|
| 102239 |
+
},
|
| 102240 |
+
{
|
| 102241 |
+
"epoch": 0.8113888888888889,
|
| 102242 |
+
"grad_norm": 0.09912109375,
|
| 102243 |
+
"learning_rate": 7.095594421187933e-05,
|
| 102244 |
+
"loss": 2.3176,
|
| 102245 |
+
"step": 14605
|
| 102246 |
+
},
|
| 102247 |
+
{
|
| 102248 |
+
"epoch": 0.8114444444444444,
|
| 102249 |
+
"grad_norm": 0.1005859375,
|
| 102250 |
+
"learning_rate": 7.091544313078666e-05,
|
| 102251 |
+
"loss": 2.4282,
|
| 102252 |
+
"step": 14606
|
| 102253 |
+
},
|
| 102254 |
+
{
|
| 102255 |
+
"epoch": 0.8115,
|
| 102256 |
+
"grad_norm": 0.1005859375,
|
| 102257 |
+
"learning_rate": 7.087495248752078e-05,
|
| 102258 |
+
"loss": 2.4332,
|
| 102259 |
+
"step": 14607
|
| 102260 |
+
},
|
| 102261 |
+
{
|
| 102262 |
+
"epoch": 0.8115555555555556,
|
| 102263 |
+
"grad_norm": 0.10498046875,
|
| 102264 |
+
"learning_rate": 7.083447228336618e-05,
|
| 102265 |
+
"loss": 2.3099,
|
| 102266 |
+
"step": 14608
|
| 102267 |
+
},
|
| 102268 |
+
{
|
| 102269 |
+
"epoch": 0.8116111111111111,
|
| 102270 |
+
"grad_norm": 0.099609375,
|
| 102271 |
+
"learning_rate": 7.079400251960632e-05,
|
| 102272 |
+
"loss": 2.3461,
|
| 102273 |
+
"step": 14609
|
| 102274 |
+
},
|
| 102275 |
+
{
|
| 102276 |
+
"epoch": 0.8116666666666666,
|
| 102277 |
+
"grad_norm": 0.10009765625,
|
| 102278 |
+
"learning_rate": 7.075354319752508e-05,
|
| 102279 |
+
"loss": 2.3172,
|
| 102280 |
+
"step": 14610
|
| 102281 |
+
},
|
| 102282 |
+
{
|
| 102283 |
+
"epoch": 0.8117222222222222,
|
| 102284 |
+
"grad_norm": 0.09912109375,
|
| 102285 |
+
"learning_rate": 7.071309431840583e-05,
|
| 102286 |
+
"loss": 2.3326,
|
| 102287 |
+
"step": 14611
|
| 102288 |
+
},
|
| 102289 |
+
{
|
| 102290 |
+
"epoch": 0.8117777777777778,
|
| 102291 |
+
"grad_norm": 0.099609375,
|
| 102292 |
+
"learning_rate": 7.067265588353142e-05,
|
| 102293 |
+
"loss": 2.3294,
|
| 102294 |
+
"step": 14612
|
| 102295 |
+
},
|
| 102296 |
+
{
|
| 102297 |
+
"epoch": 0.8118333333333333,
|
| 102298 |
+
"grad_norm": 0.1005859375,
|
| 102299 |
+
"learning_rate": 7.063222789418432e-05,
|
| 102300 |
+
"loss": 2.3677,
|
| 102301 |
+
"step": 14613
|
| 102302 |
+
},
|
| 102303 |
+
{
|
| 102304 |
+
"epoch": 0.8118888888888889,
|
| 102305 |
+
"grad_norm": 0.10107421875,
|
| 102306 |
+
"learning_rate": 7.059181035164697e-05,
|
| 102307 |
+
"loss": 2.4053,
|
| 102308 |
+
"step": 14614
|
| 102309 |
+
},
|
| 102310 |
+
{
|
| 102311 |
+
"epoch": 0.8119444444444445,
|
| 102312 |
+
"grad_norm": 0.099609375,
|
| 102313 |
+
"learning_rate": 7.055140325720148e-05,
|
| 102314 |
+
"loss": 2.3342,
|
| 102315 |
+
"step": 14615
|
| 102316 |
+
},
|
| 102317 |
+
{
|
| 102318 |
+
"epoch": 0.812,
|
| 102319 |
+
"grad_norm": 0.0986328125,
|
| 102320 |
+
"learning_rate": 7.05110066121291e-05,
|
| 102321 |
+
"loss": 2.2951,
|
| 102322 |
+
"step": 14616
|
| 102323 |
+
},
|
| 102324 |
+
{
|
| 102325 |
+
"epoch": 0.8120555555555555,
|
| 102326 |
+
"grad_norm": 0.099609375,
|
| 102327 |
+
"learning_rate": 7.047062041771133e-05,
|
| 102328 |
+
"loss": 2.3406,
|
| 102329 |
+
"step": 14617
|
| 102330 |
+
},
|
| 102331 |
+
{
|
| 102332 |
+
"epoch": 0.8121111111111111,
|
| 102333 |
+
"grad_norm": 0.09912109375,
|
| 102334 |
+
"learning_rate": 7.043024467522923e-05,
|
| 102335 |
+
"loss": 2.3376,
|
| 102336 |
+
"step": 14618
|
| 102337 |
+
},
|
| 102338 |
+
{
|
| 102339 |
+
"epoch": 0.8121666666666667,
|
| 102340 |
+
"grad_norm": 0.099609375,
|
| 102341 |
+
"learning_rate": 7.038987938596329e-05,
|
| 102342 |
+
"loss": 2.4329,
|
| 102343 |
+
"step": 14619
|
| 102344 |
+
},
|
| 102345 |
+
{
|
| 102346 |
+
"epoch": 0.8122222222222222,
|
| 102347 |
+
"grad_norm": 0.10107421875,
|
| 102348 |
+
"learning_rate": 7.034952455119373e-05,
|
| 102349 |
+
"loss": 2.3435,
|
| 102350 |
+
"step": 14620
|
| 102351 |
+
},
|
| 102352 |
+
{
|
| 102353 |
+
"epoch": 0.8122777777777778,
|
| 102354 |
+
"grad_norm": 0.0986328125,
|
| 102355 |
+
"learning_rate": 7.030918017220072e-05,
|
| 102356 |
+
"loss": 2.314,
|
| 102357 |
+
"step": 14621
|
| 102358 |
+
},
|
| 102359 |
+
{
|
| 102360 |
+
"epoch": 0.8123333333333334,
|
| 102361 |
+
"grad_norm": 0.099609375,
|
| 102362 |
+
"learning_rate": 7.026884625026382e-05,
|
| 102363 |
+
"loss": 2.3018,
|
| 102364 |
+
"step": 14622
|
| 102365 |
+
},
|
| 102366 |
+
{
|
| 102367 |
+
"epoch": 0.8123888888888889,
|
| 102368 |
+
"grad_norm": 0.099609375,
|
| 102369 |
+
"learning_rate": 7.022852278666214e-05,
|
| 102370 |
+
"loss": 2.357,
|
| 102371 |
+
"step": 14623
|
| 102372 |
+
},
|
| 102373 |
+
{
|
| 102374 |
+
"epoch": 0.8124444444444444,
|
| 102375 |
+
"grad_norm": 0.10009765625,
|
| 102376 |
+
"learning_rate": 7.018820978267489e-05,
|
| 102377 |
+
"loss": 2.3692,
|
| 102378 |
+
"step": 14624
|
| 102379 |
+
},
|
| 102380 |
+
{
|
| 102381 |
+
"epoch": 0.8125,
|
| 102382 |
+
"grad_norm": 0.10107421875,
|
| 102383 |
+
"learning_rate": 7.014790723958076e-05,
|
| 102384 |
+
"loss": 2.4414,
|
| 102385 |
+
"step": 14625
|
| 102386 |
+
},
|
| 102387 |
+
{
|
| 102388 |
+
"epoch": 0.8125555555555556,
|
| 102389 |
+
"grad_norm": 0.1015625,
|
| 102390 |
+
"learning_rate": 7.010761515865776e-05,
|
| 102391 |
+
"loss": 2.3936,
|
| 102392 |
+
"step": 14626
|
| 102393 |
+
},
|
| 102394 |
+
{
|
| 102395 |
+
"epoch": 0.8126111111111111,
|
| 102396 |
+
"grad_norm": 0.09814453125,
|
| 102397 |
+
"learning_rate": 7.006733354118407e-05,
|
| 102398 |
+
"loss": 2.3035,
|
| 102399 |
+
"step": 14627
|
| 102400 |
+
},
|
| 102401 |
+
{
|
| 102402 |
+
"epoch": 0.8126666666666666,
|
| 102403 |
+
"grad_norm": 0.09814453125,
|
| 102404 |
+
"learning_rate": 7.002706238843732e-05,
|
| 102405 |
+
"loss": 2.3139,
|
| 102406 |
+
"step": 14628
|
| 102407 |
+
},
|
| 102408 |
+
{
|
| 102409 |
+
"epoch": 0.8127222222222222,
|
| 102410 |
+
"grad_norm": 0.0986328125,
|
| 102411 |
+
"learning_rate": 6.998680170169482e-05,
|
| 102412 |
+
"loss": 2.3617,
|
| 102413 |
+
"step": 14629
|
| 102414 |
+
},
|
| 102415 |
+
{
|
| 102416 |
+
"epoch": 0.8127777777777778,
|
| 102417 |
+
"grad_norm": 0.09912109375,
|
| 102418 |
+
"learning_rate": 6.994655148223345e-05,
|
| 102419 |
+
"loss": 2.2833,
|
| 102420 |
+
"step": 14630
|
| 102421 |
+
},
|
| 102422 |
+
{
|
| 102423 |
+
"epoch": 0.8128333333333333,
|
| 102424 |
+
"grad_norm": 0.099609375,
|
| 102425 |
+
"learning_rate": 6.990631173133002e-05,
|
| 102426 |
+
"loss": 2.4251,
|
| 102427 |
+
"step": 14631
|
| 102428 |
+
},
|
| 102429 |
+
{
|
| 102430 |
+
"epoch": 0.8128888888888889,
|
| 102431 |
+
"grad_norm": 0.1015625,
|
| 102432 |
+
"learning_rate": 6.986608245026078e-05,
|
| 102433 |
+
"loss": 2.3858,
|
| 102434 |
+
"step": 14632
|
| 102435 |
+
},
|
| 102436 |
+
{
|
| 102437 |
+
"epoch": 0.8129444444444445,
|
| 102438 |
+
"grad_norm": 0.10009765625,
|
| 102439 |
+
"learning_rate": 6.982586364030157e-05,
|
| 102440 |
+
"loss": 2.3461,
|
| 102441 |
+
"step": 14633
|
| 102442 |
+
},
|
| 102443 |
+
{
|
| 102444 |
+
"epoch": 0.813,
|
| 102445 |
+
"grad_norm": 0.10009765625,
|
| 102446 |
+
"learning_rate": 6.978565530272816e-05,
|
| 102447 |
+
"loss": 2.3558,
|
| 102448 |
+
"step": 14634
|
| 102449 |
+
},
|
| 102450 |
+
{
|
| 102451 |
+
"epoch": 0.8130555555555555,
|
| 102452 |
+
"grad_norm": 0.1005859375,
|
| 102453 |
+
"learning_rate": 6.974545743881597e-05,
|
| 102454 |
+
"loss": 2.2861,
|
| 102455 |
+
"step": 14635
|
| 102456 |
+
},
|
| 102457 |
+
{
|
| 102458 |
+
"epoch": 0.8131111111111111,
|
| 102459 |
+
"grad_norm": 0.099609375,
|
| 102460 |
+
"learning_rate": 6.970527004983987e-05,
|
| 102461 |
+
"loss": 2.3258,
|
| 102462 |
+
"step": 14636
|
| 102463 |
+
},
|
| 102464 |
+
{
|
| 102465 |
+
"epoch": 0.8131666666666667,
|
| 102466 |
+
"grad_norm": 0.1005859375,
|
| 102467 |
+
"learning_rate": 6.966509313707446e-05,
|
| 102468 |
+
"loss": 2.4054,
|
| 102469 |
+
"step": 14637
|
| 102470 |
+
},
|
| 102471 |
+
{
|
| 102472 |
+
"epoch": 0.8132222222222222,
|
| 102473 |
+
"grad_norm": 0.09912109375,
|
| 102474 |
+
"learning_rate": 6.96249267017942e-05,
|
| 102475 |
+
"loss": 2.33,
|
| 102476 |
+
"step": 14638
|
| 102477 |
+
},
|
| 102478 |
+
{
|
| 102479 |
+
"epoch": 0.8132777777777778,
|
| 102480 |
+
"grad_norm": 0.09814453125,
|
| 102481 |
+
"learning_rate": 6.958477074527303e-05,
|
| 102482 |
+
"loss": 2.3096,
|
| 102483 |
+
"step": 14639
|
| 102484 |
+
},
|
| 102485 |
+
{
|
| 102486 |
+
"epoch": 0.8133333333333334,
|
| 102487 |
+
"grad_norm": 0.1005859375,
|
| 102488 |
+
"learning_rate": 6.954462526878449e-05,
|
| 102489 |
+
"loss": 2.2891,
|
| 102490 |
+
"step": 14640
|
| 102491 |
+
},
|
| 102492 |
+
{
|
| 102493 |
+
"epoch": 0.8133888888888889,
|
| 102494 |
+
"grad_norm": 0.10009765625,
|
| 102495 |
+
"learning_rate": 6.950449027360213e-05,
|
| 102496 |
+
"loss": 2.3006,
|
| 102497 |
+
"step": 14641
|
| 102498 |
+
},
|
| 102499 |
+
{
|
| 102500 |
+
"epoch": 0.8134444444444444,
|
| 102501 |
+
"grad_norm": 0.09912109375,
|
| 102502 |
+
"learning_rate": 6.946436576099866e-05,
|
| 102503 |
+
"loss": 2.3279,
|
| 102504 |
+
"step": 14642
|
| 102505 |
+
},
|
| 102506 |
+
{
|
| 102507 |
+
"epoch": 0.8135,
|
| 102508 |
+
"grad_norm": 0.10009765625,
|
| 102509 |
+
"learning_rate": 6.942425173224703e-05,
|
| 102510 |
+
"loss": 2.3861,
|
| 102511 |
+
"step": 14643
|
| 102512 |
+
},
|
| 102513 |
+
{
|
| 102514 |
+
"epoch": 0.8135555555555556,
|
| 102515 |
+
"grad_norm": 0.10107421875,
|
| 102516 |
+
"learning_rate": 6.938414818861932e-05,
|
| 102517 |
+
"loss": 2.4212,
|
| 102518 |
+
"step": 14644
|
| 102519 |
+
},
|
| 102520 |
+
{
|
| 102521 |
+
"epoch": 0.8136111111111111,
|
| 102522 |
+
"grad_norm": 0.09814453125,
|
| 102523 |
+
"learning_rate": 6.934405513138776e-05,
|
| 102524 |
+
"loss": 2.3887,
|
| 102525 |
+
"step": 14645
|
| 102526 |
+
},
|
| 102527 |
+
{
|
| 102528 |
+
"epoch": 0.8136666666666666,
|
| 102529 |
+
"grad_norm": 0.1005859375,
|
| 102530 |
+
"learning_rate": 6.930397256182386e-05,
|
| 102531 |
+
"loss": 2.3357,
|
| 102532 |
+
"step": 14646
|
| 102533 |
+
},
|
| 102534 |
+
{
|
| 102535 |
+
"epoch": 0.8137222222222222,
|
| 102536 |
+
"grad_norm": 0.09912109375,
|
| 102537 |
+
"learning_rate": 6.926390048119889e-05,
|
| 102538 |
+
"loss": 2.3446,
|
| 102539 |
+
"step": 14647
|
| 102540 |
+
},
|
| 102541 |
+
{
|
| 102542 |
+
"epoch": 0.8137777777777778,
|
| 102543 |
+
"grad_norm": 0.09814453125,
|
| 102544 |
+
"learning_rate": 6.922383889078408e-05,
|
| 102545 |
+
"loss": 2.3294,
|
| 102546 |
+
"step": 14648
|
| 102547 |
+
},
|
| 102548 |
+
{
|
| 102549 |
+
"epoch": 0.8138333333333333,
|
| 102550 |
+
"grad_norm": 0.10205078125,
|
| 102551 |
+
"learning_rate": 6.91837877918498e-05,
|
| 102552 |
+
"loss": 2.4317,
|
| 102553 |
+
"step": 14649
|
| 102554 |
+
},
|
| 102555 |
+
{
|
| 102556 |
+
"epoch": 0.8138888888888889,
|
| 102557 |
+
"grad_norm": 0.099609375,
|
| 102558 |
+
"learning_rate": 6.914374718566664e-05,
|
| 102559 |
+
"loss": 2.4236,
|
| 102560 |
+
"step": 14650
|
| 102561 |
+
},
|
| 102562 |
+
{
|
| 102563 |
+
"epoch": 0.8139444444444445,
|
| 102564 |
+
"grad_norm": 0.10009765625,
|
| 102565 |
+
"learning_rate": 6.910371707350449e-05,
|
| 102566 |
+
"loss": 2.3637,
|
| 102567 |
+
"step": 14651
|
| 102568 |
+
},
|
| 102569 |
+
{
|
| 102570 |
+
"epoch": 0.814,
|
| 102571 |
+
"grad_norm": 0.09814453125,
|
| 102572 |
+
"learning_rate": 6.90636974566329e-05,
|
| 102573 |
+
"loss": 2.3308,
|
| 102574 |
+
"step": 14652
|
| 102575 |
+
},
|
| 102576 |
+
{
|
| 102577 |
+
"epoch": 0.8140555555555555,
|
| 102578 |
+
"grad_norm": 0.0986328125,
|
| 102579 |
+
"learning_rate": 6.902368833632147e-05,
|
| 102580 |
+
"loss": 2.347,
|
| 102581 |
+
"step": 14653
|
| 102582 |
+
},
|
| 102583 |
+
{
|
| 102584 |
+
"epoch": 0.8141111111111111,
|
| 102585 |
+
"grad_norm": 0.09814453125,
|
| 102586 |
+
"learning_rate": 6.898368971383891e-05,
|
| 102587 |
+
"loss": 2.3057,
|
| 102588 |
+
"step": 14654
|
| 102589 |
+
},
|
| 102590 |
+
{
|
| 102591 |
+
"epoch": 0.8141666666666667,
|
| 102592 |
+
"grad_norm": 0.1005859375,
|
| 102593 |
+
"learning_rate": 6.894370159045412e-05,
|
| 102594 |
+
"loss": 2.3419,
|
| 102595 |
+
"step": 14655
|
| 102596 |
+
},
|
| 102597 |
+
{
|
| 102598 |
+
"epoch": 0.8142222222222222,
|
| 102599 |
+
"grad_norm": 0.10107421875,
|
| 102600 |
+
"learning_rate": 6.890372396743528e-05,
|
| 102601 |
+
"loss": 2.4309,
|
| 102602 |
+
"step": 14656
|
| 102603 |
+
},
|
| 102604 |
+
{
|
| 102605 |
+
"epoch": 0.8142777777777778,
|
| 102606 |
+
"grad_norm": 0.0986328125,
|
| 102607 |
+
"learning_rate": 6.886375684605053e-05,
|
| 102608 |
+
"loss": 2.3099,
|
| 102609 |
+
"step": 14657
|
| 102610 |
+
},
|
| 102611 |
+
{
|
| 102612 |
+
"epoch": 0.8143333333333334,
|
| 102613 |
+
"grad_norm": 0.099609375,
|
| 102614 |
+
"learning_rate": 6.882380022756745e-05,
|
| 102615 |
+
"loss": 2.2935,
|
| 102616 |
+
"step": 14658
|
| 102617 |
+
},
|
| 102618 |
+
{
|
| 102619 |
+
"epoch": 0.8143888888888889,
|
| 102620 |
+
"grad_norm": 0.0986328125,
|
| 102621 |
+
"learning_rate": 6.878385411325328e-05,
|
| 102622 |
+
"loss": 2.301,
|
| 102623 |
+
"step": 14659
|
| 102624 |
+
},
|
| 102625 |
+
{
|
| 102626 |
+
"epoch": 0.8144444444444444,
|
| 102627 |
+
"grad_norm": 0.0986328125,
|
| 102628 |
+
"learning_rate": 6.874391850437514e-05,
|
| 102629 |
+
"loss": 2.3064,
|
| 102630 |
+
"step": 14660
|
| 102631 |
+
},
|
| 102632 |
+
{
|
| 102633 |
+
"epoch": 0.8145,
|
| 102634 |
+
"grad_norm": 0.09912109375,
|
| 102635 |
+
"learning_rate": 6.870399340219988e-05,
|
| 102636 |
+
"loss": 2.2995,
|
| 102637 |
+
"step": 14661
|
| 102638 |
+
},
|
| 102639 |
+
{
|
| 102640 |
+
"epoch": 0.8145555555555556,
|
| 102641 |
+
"grad_norm": 0.09814453125,
|
| 102642 |
+
"learning_rate": 6.866407880799343e-05,
|
| 102643 |
+
"loss": 2.3258,
|
| 102644 |
+
"step": 14662
|
| 102645 |
+
},
|
| 102646 |
+
{
|
| 102647 |
+
"epoch": 0.8146111111111111,
|
| 102648 |
+
"grad_norm": 0.099609375,
|
| 102649 |
+
"learning_rate": 6.862417472302198e-05,
|
| 102650 |
+
"loss": 2.4894,
|
| 102651 |
+
"step": 14663
|
| 102652 |
+
},
|
| 102653 |
+
{
|
| 102654 |
+
"epoch": 0.8146666666666667,
|
| 102655 |
+
"grad_norm": 0.099609375,
|
| 102656 |
+
"learning_rate": 6.858428114855136e-05,
|
| 102657 |
+
"loss": 2.3617,
|
| 102658 |
+
"step": 14664
|
| 102659 |
+
},
|
| 102660 |
+
{
|
| 102661 |
+
"epoch": 0.8147222222222222,
|
| 102662 |
+
"grad_norm": 0.1005859375,
|
| 102663 |
+
"learning_rate": 6.854439808584672e-05,
|
| 102664 |
+
"loss": 2.3531,
|
| 102665 |
+
"step": 14665
|
| 102666 |
+
},
|
| 102667 |
+
{
|
| 102668 |
+
"epoch": 0.8147777777777778,
|
| 102669 |
+
"grad_norm": 0.099609375,
|
| 102670 |
+
"learning_rate": 6.850452553617306e-05,
|
| 102671 |
+
"loss": 2.2911,
|
| 102672 |
+
"step": 14666
|
| 102673 |
+
},
|
| 102674 |
+
{
|
| 102675 |
+
"epoch": 0.8148333333333333,
|
| 102676 |
+
"grad_norm": 0.10009765625,
|
| 102677 |
+
"learning_rate": 6.846466350079515e-05,
|
| 102678 |
+
"loss": 2.2877,
|
| 102679 |
+
"step": 14667
|
| 102680 |
+
},
|
| 102681 |
+
{
|
| 102682 |
+
"epoch": 0.8148888888888889,
|
| 102683 |
+
"grad_norm": 0.1005859375,
|
| 102684 |
+
"learning_rate": 6.84248119809773e-05,
|
| 102685 |
+
"loss": 2.4211,
|
| 102686 |
+
"step": 14668
|
| 102687 |
+
},
|
| 102688 |
+
{
|
| 102689 |
+
"epoch": 0.8149444444444445,
|
| 102690 |
+
"grad_norm": 0.1005859375,
|
| 102691 |
+
"learning_rate": 6.838497097798335e-05,
|
| 102692 |
+
"loss": 2.3843,
|
| 102693 |
+
"step": 14669
|
| 102694 |
+
},
|
| 102695 |
+
{
|
| 102696 |
+
"epoch": 0.815,
|
| 102697 |
+
"grad_norm": 0.09912109375,
|
| 102698 |
+
"learning_rate": 6.83451404930771e-05,
|
| 102699 |
+
"loss": 2.3604,
|
| 102700 |
+
"step": 14670
|
| 102701 |
+
},
|
| 102702 |
+
{
|
| 102703 |
+
"epoch": 0.8150555555555555,
|
| 102704 |
+
"grad_norm": 0.09814453125,
|
| 102705 |
+
"learning_rate": 6.83053205275221e-05,
|
| 102706 |
+
"loss": 2.3164,
|
| 102707 |
+
"step": 14671
|
| 102708 |
+
},
|
| 102709 |
+
{
|
| 102710 |
+
"epoch": 0.8151111111111111,
|
| 102711 |
+
"grad_norm": 0.0986328125,
|
| 102712 |
+
"learning_rate": 6.82655110825809e-05,
|
| 102713 |
+
"loss": 2.2948,
|
| 102714 |
+
"step": 14672
|
| 102715 |
+
},
|
| 102716 |
+
{
|
| 102717 |
+
"epoch": 0.8151666666666667,
|
| 102718 |
+
"grad_norm": 0.09814453125,
|
| 102719 |
+
"learning_rate": 6.822571215951641e-05,
|
| 102720 |
+
"loss": 2.3123,
|
| 102721 |
+
"step": 14673
|
| 102722 |
+
},
|
| 102723 |
+
{
|
| 102724 |
+
"epoch": 0.8152222222222222,
|
| 102725 |
+
"grad_norm": 0.10009765625,
|
| 102726 |
+
"learning_rate": 6.818592375959104e-05,
|
| 102727 |
+
"loss": 2.3954,
|
| 102728 |
+
"step": 14674
|
| 102729 |
+
},
|
| 102730 |
+
{
|
| 102731 |
+
"epoch": 0.8152777777777778,
|
| 102732 |
+
"grad_norm": 0.10009765625,
|
| 102733 |
+
"learning_rate": 6.814614588406674e-05,
|
| 102734 |
+
"loss": 2.3927,
|
| 102735 |
+
"step": 14675
|
| 102736 |
+
},
|
| 102737 |
+
{
|
| 102738 |
+
"epoch": 0.8153333333333334,
|
| 102739 |
+
"grad_norm": 0.1005859375,
|
| 102740 |
+
"learning_rate": 6.8106378534205e-05,
|
| 102741 |
+
"loss": 2.3519,
|
| 102742 |
+
"step": 14676
|
| 102743 |
+
},
|
| 102744 |
+
{
|
| 102745 |
+
"epoch": 0.8153888888888889,
|
| 102746 |
+
"grad_norm": 0.09912109375,
|
| 102747 |
+
"learning_rate": 6.806662171126741e-05,
|
| 102748 |
+
"loss": 2.3356,
|
| 102749 |
+
"step": 14677
|
| 102750 |
+
},
|
| 102751 |
+
{
|
| 102752 |
+
"epoch": 0.8154444444444444,
|
| 102753 |
+
"grad_norm": 0.10107421875,
|
| 102754 |
+
"learning_rate": 6.802687541651481e-05,
|
| 102755 |
+
"loss": 2.3303,
|
| 102756 |
+
"step": 14678
|
| 102757 |
+
},
|
| 102758 |
+
{
|
| 102759 |
+
"epoch": 0.8155,
|
| 102760 |
+
"grad_norm": 0.09814453125,
|
| 102761 |
+
"learning_rate": 6.798713965120782e-05,
|
| 102762 |
+
"loss": 2.2965,
|
| 102763 |
+
"step": 14679
|
| 102764 |
+
},
|
| 102765 |
+
{
|
| 102766 |
+
"epoch": 0.8155555555555556,
|
| 102767 |
+
"grad_norm": 0.09912109375,
|
| 102768 |
+
"learning_rate": 6.794741441660683e-05,
|
| 102769 |
+
"loss": 2.3209,
|
| 102770 |
+
"step": 14680
|
| 102771 |
+
},
|
| 102772 |
+
{
|
| 102773 |
+
"epoch": 0.8156111111111111,
|
| 102774 |
+
"grad_norm": 0.10205078125,
|
| 102775 |
+
"learning_rate": 6.790769971397196e-05,
|
| 102776 |
+
"loss": 2.4052,
|
| 102777 |
+
"step": 14681
|
| 102778 |
+
},
|
| 102779 |
+
{
|
| 102780 |
+
"epoch": 0.8156666666666667,
|
| 102781 |
+
"grad_norm": 0.099609375,
|
| 102782 |
+
"learning_rate": 6.786799554456278e-05,
|
| 102783 |
+
"loss": 2.3653,
|
| 102784 |
+
"step": 14682
|
| 102785 |
+
},
|
| 102786 |
+
{
|
| 102787 |
+
"epoch": 0.8157222222222222,
|
| 102788 |
+
"grad_norm": 0.09912109375,
|
| 102789 |
+
"learning_rate": 6.782830190963849e-05,
|
| 102790 |
+
"loss": 2.3078,
|
| 102791 |
+
"step": 14683
|
| 102792 |
+
},
|
| 102793 |
+
{
|
| 102794 |
+
"epoch": 0.8157777777777778,
|
| 102795 |
+
"grad_norm": 0.09912109375,
|
| 102796 |
+
"learning_rate": 6.77886188104583e-05,
|
| 102797 |
+
"loss": 2.3082,
|
| 102798 |
+
"step": 14684
|
| 102799 |
+
},
|
| 102800 |
+
{
|
| 102801 |
+
"epoch": 0.8158333333333333,
|
| 102802 |
+
"grad_norm": 0.099609375,
|
| 102803 |
+
"learning_rate": 6.774894624828077e-05,
|
| 102804 |
+
"loss": 2.3264,
|
| 102805 |
+
"step": 14685
|
| 102806 |
+
},
|
| 102807 |
+
{
|
| 102808 |
+
"epoch": 0.8158888888888889,
|
| 102809 |
+
"grad_norm": 0.09912109375,
|
| 102810 |
+
"learning_rate": 6.770928422436411e-05,
|
| 102811 |
+
"loss": 2.3505,
|
| 102812 |
+
"step": 14686
|
| 102813 |
+
},
|
| 102814 |
+
{
|
| 102815 |
+
"epoch": 0.8159444444444445,
|
| 102816 |
+
"grad_norm": 0.10107421875,
|
| 102817 |
+
"learning_rate": 6.766963273996649e-05,
|
| 102818 |
+
"loss": 2.427,
|
| 102819 |
+
"step": 14687
|
| 102820 |
+
},
|
| 102821 |
+
{
|
| 102822 |
+
"epoch": 0.816,
|
| 102823 |
+
"grad_norm": 0.099609375,
|
| 102824 |
+
"learning_rate": 6.762999179634544e-05,
|
| 102825 |
+
"loss": 2.3383,
|
| 102826 |
+
"step": 14688
|
| 102827 |
+
},
|
| 102828 |
+
{
|
| 102829 |
+
"epoch": 0.8160555555555555,
|
| 102830 |
+
"grad_norm": 0.0986328125,
|
| 102831 |
+
"learning_rate": 6.759036139475843e-05,
|
| 102832 |
+
"loss": 2.3603,
|
| 102833 |
+
"step": 14689
|
| 102834 |
+
},
|
| 102835 |
+
{
|
| 102836 |
+
"epoch": 0.8161111111111111,
|
| 102837 |
+
"grad_norm": 0.099609375,
|
| 102838 |
+
"learning_rate": 6.755074153646224e-05,
|
| 102839 |
+
"loss": 2.369,
|
| 102840 |
+
"step": 14690
|
| 102841 |
+
},
|
| 102842 |
+
{
|
| 102843 |
+
"epoch": 0.8161666666666667,
|
| 102844 |
+
"grad_norm": 0.09814453125,
|
| 102845 |
+
"learning_rate": 6.751113222271377e-05,
|
| 102846 |
+
"loss": 2.3277,
|
| 102847 |
+
"step": 14691
|
| 102848 |
+
},
|
| 102849 |
+
{
|
| 102850 |
+
"epoch": 0.8162222222222222,
|
| 102851 |
+
"grad_norm": 0.09912109375,
|
| 102852 |
+
"learning_rate": 6.747153345476917e-05,
|
| 102853 |
+
"loss": 2.3063,
|
| 102854 |
+
"step": 14692
|
| 102855 |
+
},
|
| 102856 |
+
{
|
| 102857 |
+
"epoch": 0.8162777777777778,
|
| 102858 |
+
"grad_norm": 0.099609375,
|
| 102859 |
+
"learning_rate": 6.743194523388444e-05,
|
| 102860 |
+
"loss": 2.4056,
|
| 102861 |
+
"step": 14693
|
| 102862 |
+
},
|
| 102863 |
+
{
|
| 102864 |
+
"epoch": 0.8163333333333334,
|
| 102865 |
+
"grad_norm": 0.1015625,
|
| 102866 |
+
"learning_rate": 6.739236756131528e-05,
|
| 102867 |
+
"loss": 2.4214,
|
| 102868 |
+
"step": 14694
|
| 102869 |
+
},
|
| 102870 |
+
{
|
| 102871 |
+
"epoch": 0.8163888888888889,
|
| 102872 |
+
"grad_norm": 0.1005859375,
|
| 102873 |
+
"learning_rate": 6.735280043831687e-05,
|
| 102874 |
+
"loss": 2.3582,
|
| 102875 |
+
"step": 14695
|
| 102876 |
+
},
|
| 102877 |
+
{
|
| 102878 |
+
"epoch": 0.8164444444444444,
|
| 102879 |
+
"grad_norm": 0.09814453125,
|
| 102880 |
+
"learning_rate": 6.731324386614444e-05,
|
| 102881 |
+
"loss": 2.3456,
|
| 102882 |
+
"step": 14696
|
| 102883 |
+
},
|
| 102884 |
+
{
|
| 102885 |
+
"epoch": 0.8165,
|
| 102886 |
+
"grad_norm": 0.1015625,
|
| 102887 |
+
"learning_rate": 6.727369784605251e-05,
|
| 102888 |
+
"loss": 2.3552,
|
| 102889 |
+
"step": 14697
|
| 102890 |
+
},
|
| 102891 |
+
{
|
| 102892 |
+
"epoch": 0.8165555555555556,
|
| 102893 |
+
"grad_norm": 0.09912109375,
|
| 102894 |
+
"learning_rate": 6.723416237929523e-05,
|
| 102895 |
+
"loss": 2.2918,
|
| 102896 |
+
"step": 14698
|
| 102897 |
+
},
|
| 102898 |
+
{
|
| 102899 |
+
"epoch": 0.8166111111111111,
|
| 102900 |
+
"grad_norm": 0.09765625,
|
| 102901 |
+
"learning_rate": 6.719463746712684e-05,
|
| 102902 |
+
"loss": 2.374,
|
| 102903 |
+
"step": 14699
|
| 102904 |
+
},
|
| 102905 |
+
{
|
| 102906 |
+
"epoch": 0.8166666666666667,
|
| 102907 |
+
"grad_norm": 0.10009765625,
|
| 102908 |
+
"learning_rate": 6.715512311080079e-05,
|
| 102909 |
+
"loss": 2.3657,
|
| 102910 |
+
"step": 14700
|
| 102911 |
}
|
| 102912 |
],
|
| 102913 |
"logging_steps": 1,
|
|
|
|
| 102927 |
"attributes": {}
|
| 102928 |
}
|
| 102929 |
},
|
| 102930 |
+
"total_flos": 7.755903518420334e+18,
|
| 102931 |
"train_batch_size": 8,
|
| 102932 |
"trial_name": null,
|
| 102933 |
"trial_params": null
|