Training in progress, step 15300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1257662d5e7a33433cc515ab20624ece675c9decd2c57dd720e2c080b3b5d1f5
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92f0d990cc98602052f072da60717a4200328a42e49f807ab284cf8c86b12b59
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00af62b60f684e0ae8e5e4a0958515fde1928c7960e1e4baba4933a4cb10355d
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 17.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -105008,6 +105008,2106 @@
|
|
| 105008 |
"learning_rate": 2.7938828357560397e-06,
|
| 105009 |
"loss": 0.5363,
|
| 105010 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105011 |
}
|
| 105012 |
],
|
| 105013 |
"logging_steps": 1,
|
|
@@ -105027,7 +107127,7 @@
|
|
| 105027 |
"attributes": {}
|
| 105028 |
}
|
| 105029 |
},
|
| 105030 |
-
"total_flos": 8.
|
| 105031 |
"train_batch_size": 8,
|
| 105032 |
"trial_name": null,
|
| 105033 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 17.426210826210827,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15300,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 105008 |
"learning_rate": 2.7938828357560397e-06,
|
| 105009 |
"loss": 0.5363,
|
| 105010 |
"step": 15000
|
| 105011 |
+
},
|
| 105012 |
+
{
|
| 105013 |
+
"epoch": 17.085470085470085,
|
| 105014 |
+
"grad_norm": 0.2287999540567398,
|
| 105015 |
+
"learning_rate": 2.7917429304309127e-06,
|
| 105016 |
+
"loss": 0.5335,
|
| 105017 |
+
"step": 15001
|
| 105018 |
+
},
|
| 105019 |
+
{
|
| 105020 |
+
"epoch": 17.086609686609687,
|
| 105021 |
+
"grad_norm": 0.18001307547092438,
|
| 105022 |
+
"learning_rate": 2.7896037964596333e-06,
|
| 105023 |
+
"loss": 0.6861,
|
| 105024 |
+
"step": 15002
|
| 105025 |
+
},
|
| 105026 |
+
{
|
| 105027 |
+
"epoch": 17.087749287749286,
|
| 105028 |
+
"grad_norm": 0.19185101985931396,
|
| 105029 |
+
"learning_rate": 2.787465433916486e-06,
|
| 105030 |
+
"loss": 0.661,
|
| 105031 |
+
"step": 15003
|
| 105032 |
+
},
|
| 105033 |
+
{
|
| 105034 |
+
"epoch": 17.08888888888889,
|
| 105035 |
+
"grad_norm": 0.273325115442276,
|
| 105036 |
+
"learning_rate": 2.7853278428757455e-06,
|
| 105037 |
+
"loss": 0.3503,
|
| 105038 |
+
"step": 15004
|
| 105039 |
+
},
|
| 105040 |
+
{
|
| 105041 |
+
"epoch": 17.09002849002849,
|
| 105042 |
+
"grad_norm": 0.16111336648464203,
|
| 105043 |
+
"learning_rate": 2.7831910234116716e-06,
|
| 105044 |
+
"loss": 0.6399,
|
| 105045 |
+
"step": 15005
|
| 105046 |
+
},
|
| 105047 |
+
{
|
| 105048 |
+
"epoch": 17.09116809116809,
|
| 105049 |
+
"grad_norm": 0.2116054743528366,
|
| 105050 |
+
"learning_rate": 2.781054975598463e-06,
|
| 105051 |
+
"loss": 0.7563,
|
| 105052 |
+
"step": 15006
|
| 105053 |
+
},
|
| 105054 |
+
{
|
| 105055 |
+
"epoch": 17.092307692307692,
|
| 105056 |
+
"grad_norm": 0.17487739026546478,
|
| 105057 |
+
"learning_rate": 2.7789196995103155e-06,
|
| 105058 |
+
"loss": 0.6322,
|
| 105059 |
+
"step": 15007
|
| 105060 |
+
},
|
| 105061 |
+
{
|
| 105062 |
+
"epoch": 17.093447293447294,
|
| 105063 |
+
"grad_norm": 0.19562922418117523,
|
| 105064 |
+
"learning_rate": 2.7767851952213988e-06,
|
| 105065 |
+
"loss": 0.4521,
|
| 105066 |
+
"step": 15008
|
| 105067 |
+
},
|
| 105068 |
+
{
|
| 105069 |
+
"epoch": 17.094586894586893,
|
| 105070 |
+
"grad_norm": 0.18290062248706818,
|
| 105071 |
+
"learning_rate": 2.7746514628058466e-06,
|
| 105072 |
+
"loss": 0.6395,
|
| 105073 |
+
"step": 15009
|
| 105074 |
+
},
|
| 105075 |
+
{
|
| 105076 |
+
"epoch": 17.095726495726495,
|
| 105077 |
+
"grad_norm": 0.18277134001255035,
|
| 105078 |
+
"learning_rate": 2.7725185023377676e-06,
|
| 105079 |
+
"loss": 0.4365,
|
| 105080 |
+
"step": 15010
|
| 105081 |
+
},
|
| 105082 |
+
{
|
| 105083 |
+
"epoch": 17.096866096866098,
|
| 105084 |
+
"grad_norm": 0.20527899265289307,
|
| 105085 |
+
"learning_rate": 2.770386313891246e-06,
|
| 105086 |
+
"loss": 0.4795,
|
| 105087 |
+
"step": 15011
|
| 105088 |
+
},
|
| 105089 |
+
{
|
| 105090 |
+
"epoch": 17.098005698005696,
|
| 105091 |
+
"grad_norm": 0.24203483760356903,
|
| 105092 |
+
"learning_rate": 2.76825489754034e-06,
|
| 105093 |
+
"loss": 0.4322,
|
| 105094 |
+
"step": 15012
|
| 105095 |
+
},
|
| 105096 |
+
{
|
| 105097 |
+
"epoch": 17.0991452991453,
|
| 105098 |
+
"grad_norm": 1.090076208114624,
|
| 105099 |
+
"learning_rate": 2.7661242533590842e-06,
|
| 105100 |
+
"loss": 0.744,
|
| 105101 |
+
"step": 15013
|
| 105102 |
+
},
|
| 105103 |
+
{
|
| 105104 |
+
"epoch": 17.1002849002849,
|
| 105105 |
+
"grad_norm": 0.2109411507844925,
|
| 105106 |
+
"learning_rate": 2.7639943814214696e-06,
|
| 105107 |
+
"loss": 0.4425,
|
| 105108 |
+
"step": 15014
|
| 105109 |
+
},
|
| 105110 |
+
{
|
| 105111 |
+
"epoch": 17.1014245014245,
|
| 105112 |
+
"grad_norm": 0.27713099122047424,
|
| 105113 |
+
"learning_rate": 2.761865281801476e-06,
|
| 105114 |
+
"loss": 0.5374,
|
| 105115 |
+
"step": 15015
|
| 105116 |
+
},
|
| 105117 |
+
{
|
| 105118 |
+
"epoch": 17.102564102564102,
|
| 105119 |
+
"grad_norm": 0.24252092838287354,
|
| 105120 |
+
"learning_rate": 2.759736954573064e-06,
|
| 105121 |
+
"loss": 0.5649,
|
| 105122 |
+
"step": 15016
|
| 105123 |
+
},
|
| 105124 |
+
{
|
| 105125 |
+
"epoch": 17.103703703703705,
|
| 105126 |
+
"grad_norm": 0.2273343801498413,
|
| 105127 |
+
"learning_rate": 2.757609399810146e-06,
|
| 105128 |
+
"loss": 0.7687,
|
| 105129 |
+
"step": 15017
|
| 105130 |
+
},
|
| 105131 |
+
{
|
| 105132 |
+
"epoch": 17.104843304843303,
|
| 105133 |
+
"grad_norm": 0.21032743155956268,
|
| 105134 |
+
"learning_rate": 2.7554826175866187e-06,
|
| 105135 |
+
"loss": 0.7519,
|
| 105136 |
+
"step": 15018
|
| 105137 |
+
},
|
| 105138 |
+
{
|
| 105139 |
+
"epoch": 17.105982905982906,
|
| 105140 |
+
"grad_norm": 0.22888341546058655,
|
| 105141 |
+
"learning_rate": 2.753356607976354e-06,
|
| 105142 |
+
"loss": 0.6021,
|
| 105143 |
+
"step": 15019
|
| 105144 |
+
},
|
| 105145 |
+
{
|
| 105146 |
+
"epoch": 17.107122507122508,
|
| 105147 |
+
"grad_norm": 0.16688553988933563,
|
| 105148 |
+
"learning_rate": 2.7512313710531924e-06,
|
| 105149 |
+
"loss": 0.8072,
|
| 105150 |
+
"step": 15020
|
| 105151 |
+
},
|
| 105152 |
+
{
|
| 105153 |
+
"epoch": 17.108262108262107,
|
| 105154 |
+
"grad_norm": 0.18631112575531006,
|
| 105155 |
+
"learning_rate": 2.7491069068909553e-06,
|
| 105156 |
+
"loss": 0.7823,
|
| 105157 |
+
"step": 15021
|
| 105158 |
+
},
|
| 105159 |
+
{
|
| 105160 |
+
"epoch": 17.10940170940171,
|
| 105161 |
+
"grad_norm": 0.17442795634269714,
|
| 105162 |
+
"learning_rate": 2.74698321556342e-06,
|
| 105163 |
+
"loss": 0.7536,
|
| 105164 |
+
"step": 15022
|
| 105165 |
+
},
|
| 105166 |
+
{
|
| 105167 |
+
"epoch": 17.11054131054131,
|
| 105168 |
+
"grad_norm": 0.21520088613033295,
|
| 105169 |
+
"learning_rate": 2.7448602971443527e-06,
|
| 105170 |
+
"loss": 0.4669,
|
| 105171 |
+
"step": 15023
|
| 105172 |
+
},
|
| 105173 |
+
{
|
| 105174 |
+
"epoch": 17.11168091168091,
|
| 105175 |
+
"grad_norm": 0.2754962742328644,
|
| 105176 |
+
"learning_rate": 2.742738151707491e-06,
|
| 105177 |
+
"loss": 0.591,
|
| 105178 |
+
"step": 15024
|
| 105179 |
+
},
|
| 105180 |
+
{
|
| 105181 |
+
"epoch": 17.112820512820512,
|
| 105182 |
+
"grad_norm": 0.216432586312294,
|
| 105183 |
+
"learning_rate": 2.7406167793265465e-06,
|
| 105184 |
+
"loss": 0.6418,
|
| 105185 |
+
"step": 15025
|
| 105186 |
+
},
|
| 105187 |
+
{
|
| 105188 |
+
"epoch": 17.113960113960115,
|
| 105189 |
+
"grad_norm": 0.18414662778377533,
|
| 105190 |
+
"learning_rate": 2.738496180075181e-06,
|
| 105191 |
+
"loss": 0.8555,
|
| 105192 |
+
"step": 15026
|
| 105193 |
+
},
|
| 105194 |
+
{
|
| 105195 |
+
"epoch": 17.115099715099714,
|
| 105196 |
+
"grad_norm": 0.18145786225795746,
|
| 105197 |
+
"learning_rate": 2.7363763540270705e-06,
|
| 105198 |
+
"loss": 0.5076,
|
| 105199 |
+
"step": 15027
|
| 105200 |
+
},
|
| 105201 |
+
{
|
| 105202 |
+
"epoch": 17.116239316239316,
|
| 105203 |
+
"grad_norm": 0.1736234575510025,
|
| 105204 |
+
"learning_rate": 2.734257301255838e-06,
|
| 105205 |
+
"loss": 0.8125,
|
| 105206 |
+
"step": 15028
|
| 105207 |
+
},
|
| 105208 |
+
{
|
| 105209 |
+
"epoch": 17.117378917378918,
|
| 105210 |
+
"grad_norm": 0.163956880569458,
|
| 105211 |
+
"learning_rate": 2.7321390218350754e-06,
|
| 105212 |
+
"loss": 0.7355,
|
| 105213 |
+
"step": 15029
|
| 105214 |
+
},
|
| 105215 |
+
{
|
| 105216 |
+
"epoch": 17.118518518518517,
|
| 105217 |
+
"grad_norm": 0.1710730493068695,
|
| 105218 |
+
"learning_rate": 2.730021515838363e-06,
|
| 105219 |
+
"loss": 0.7979,
|
| 105220 |
+
"step": 15030
|
| 105221 |
+
},
|
| 105222 |
+
{
|
| 105223 |
+
"epoch": 17.11965811965812,
|
| 105224 |
+
"grad_norm": 0.20885802805423737,
|
| 105225 |
+
"learning_rate": 2.7279047833392467e-06,
|
| 105226 |
+
"loss": 0.7053,
|
| 105227 |
+
"step": 15031
|
| 105228 |
+
},
|
| 105229 |
+
{
|
| 105230 |
+
"epoch": 17.12079772079772,
|
| 105231 |
+
"grad_norm": 0.18835724890232086,
|
| 105232 |
+
"learning_rate": 2.725788824411249e-06,
|
| 105233 |
+
"loss": 0.5905,
|
| 105234 |
+
"step": 15032
|
| 105235 |
+
},
|
| 105236 |
+
{
|
| 105237 |
+
"epoch": 17.12193732193732,
|
| 105238 |
+
"grad_norm": 0.19526271522045135,
|
| 105239 |
+
"learning_rate": 2.723673639127855e-06,
|
| 105240 |
+
"loss": 0.5703,
|
| 105241 |
+
"step": 15033
|
| 105242 |
+
},
|
| 105243 |
+
{
|
| 105244 |
+
"epoch": 17.123076923076923,
|
| 105245 |
+
"grad_norm": 0.17180439829826355,
|
| 105246 |
+
"learning_rate": 2.721559227562537e-06,
|
| 105247 |
+
"loss": 0.7765,
|
| 105248 |
+
"step": 15034
|
| 105249 |
+
},
|
| 105250 |
+
{
|
| 105251 |
+
"epoch": 17.124216524216525,
|
| 105252 |
+
"grad_norm": 0.24739310145378113,
|
| 105253 |
+
"learning_rate": 2.71944558978873e-06,
|
| 105254 |
+
"loss": 0.5759,
|
| 105255 |
+
"step": 15035
|
| 105256 |
+
},
|
| 105257 |
+
{
|
| 105258 |
+
"epoch": 17.125356125356124,
|
| 105259 |
+
"grad_norm": 0.18169115483760834,
|
| 105260 |
+
"learning_rate": 2.7173327258798566e-06,
|
| 105261 |
+
"loss": 0.7963,
|
| 105262 |
+
"step": 15036
|
| 105263 |
+
},
|
| 105264 |
+
{
|
| 105265 |
+
"epoch": 17.126495726495726,
|
| 105266 |
+
"grad_norm": 0.16554099321365356,
|
| 105267 |
+
"learning_rate": 2.715220635909285e-06,
|
| 105268 |
+
"loss": 0.5819,
|
| 105269 |
+
"step": 15037
|
| 105270 |
+
},
|
| 105271 |
+
{
|
| 105272 |
+
"epoch": 17.12763532763533,
|
| 105273 |
+
"grad_norm": 0.2766818106174469,
|
| 105274 |
+
"learning_rate": 2.7131093199503883e-06,
|
| 105275 |
+
"loss": 0.417,
|
| 105276 |
+
"step": 15038
|
| 105277 |
+
},
|
| 105278 |
+
{
|
| 105279 |
+
"epoch": 17.128774928774927,
|
| 105280 |
+
"grad_norm": 0.20147456228733063,
|
| 105281 |
+
"learning_rate": 2.7109987780764985e-06,
|
| 105282 |
+
"loss": 0.5501,
|
| 105283 |
+
"step": 15039
|
| 105284 |
+
},
|
| 105285 |
+
{
|
| 105286 |
+
"epoch": 17.12991452991453,
|
| 105287 |
+
"grad_norm": 0.16212566196918488,
|
| 105288 |
+
"learning_rate": 2.708889010360913e-06,
|
| 105289 |
+
"loss": 0.6883,
|
| 105290 |
+
"step": 15040
|
| 105291 |
+
},
|
| 105292 |
+
{
|
| 105293 |
+
"epoch": 17.13105413105413,
|
| 105294 |
+
"grad_norm": 0.16984368860721588,
|
| 105295 |
+
"learning_rate": 2.7067800168769116e-06,
|
| 105296 |
+
"loss": 0.6953,
|
| 105297 |
+
"step": 15041
|
| 105298 |
+
},
|
| 105299 |
+
{
|
| 105300 |
+
"epoch": 17.13219373219373,
|
| 105301 |
+
"grad_norm": 0.22705335915088654,
|
| 105302 |
+
"learning_rate": 2.704671797697747e-06,
|
| 105303 |
+
"loss": 0.6529,
|
| 105304 |
+
"step": 15042
|
| 105305 |
+
},
|
| 105306 |
+
{
|
| 105307 |
+
"epoch": 17.133333333333333,
|
| 105308 |
+
"grad_norm": 0.1955214887857437,
|
| 105309 |
+
"learning_rate": 2.70256435289665e-06,
|
| 105310 |
+
"loss": 0.536,
|
| 105311 |
+
"step": 15043
|
| 105312 |
+
},
|
| 105313 |
+
{
|
| 105314 |
+
"epoch": 17.134472934472935,
|
| 105315 |
+
"grad_norm": 0.21065659821033478,
|
| 105316 |
+
"learning_rate": 2.7004576825468058e-06,
|
| 105317 |
+
"loss": 0.5179,
|
| 105318 |
+
"step": 15044
|
| 105319 |
+
},
|
| 105320 |
+
{
|
| 105321 |
+
"epoch": 17.135612535612534,
|
| 105322 |
+
"grad_norm": 0.22755803167819977,
|
| 105323 |
+
"learning_rate": 2.698351786721387e-06,
|
| 105324 |
+
"loss": 0.4612,
|
| 105325 |
+
"step": 15045
|
| 105326 |
+
},
|
| 105327 |
+
{
|
| 105328 |
+
"epoch": 17.136752136752136,
|
| 105329 |
+
"grad_norm": 0.21350692212581635,
|
| 105330 |
+
"learning_rate": 2.696246665493543e-06,
|
| 105331 |
+
"loss": 0.6052,
|
| 105332 |
+
"step": 15046
|
| 105333 |
+
},
|
| 105334 |
+
{
|
| 105335 |
+
"epoch": 17.13789173789174,
|
| 105336 |
+
"grad_norm": 0.20846883952617645,
|
| 105337 |
+
"learning_rate": 2.6941423189363934e-06,
|
| 105338 |
+
"loss": 0.8051,
|
| 105339 |
+
"step": 15047
|
| 105340 |
+
},
|
| 105341 |
+
{
|
| 105342 |
+
"epoch": 17.139031339031337,
|
| 105343 |
+
"grad_norm": 0.15907186269760132,
|
| 105344 |
+
"learning_rate": 2.6920387471230128e-06,
|
| 105345 |
+
"loss": 0.6744,
|
| 105346 |
+
"step": 15048
|
| 105347 |
+
},
|
| 105348 |
+
{
|
| 105349 |
+
"epoch": 17.14017094017094,
|
| 105350 |
+
"grad_norm": 0.1589837670326233,
|
| 105351 |
+
"learning_rate": 2.6899359501264765e-06,
|
| 105352 |
+
"loss": 0.5029,
|
| 105353 |
+
"step": 15049
|
| 105354 |
+
},
|
| 105355 |
+
{
|
| 105356 |
+
"epoch": 17.141310541310542,
|
| 105357 |
+
"grad_norm": 0.20351219177246094,
|
| 105358 |
+
"learning_rate": 2.687833928019823e-06,
|
| 105359 |
+
"loss": 0.6728,
|
| 105360 |
+
"step": 15050
|
| 105361 |
+
},
|
| 105362 |
+
{
|
| 105363 |
+
"epoch": 17.14245014245014,
|
| 105364 |
+
"grad_norm": 0.2065182328224182,
|
| 105365 |
+
"learning_rate": 2.685732680876052e-06,
|
| 105366 |
+
"loss": 0.496,
|
| 105367 |
+
"step": 15051
|
| 105368 |
+
},
|
| 105369 |
+
{
|
| 105370 |
+
"epoch": 17.143589743589743,
|
| 105371 |
+
"grad_norm": 0.16153362393379211,
|
| 105372 |
+
"learning_rate": 2.68363220876815e-06,
|
| 105373 |
+
"loss": 0.952,
|
| 105374 |
+
"step": 15052
|
| 105375 |
+
},
|
| 105376 |
+
{
|
| 105377 |
+
"epoch": 17.144729344729345,
|
| 105378 |
+
"grad_norm": 0.2070079892873764,
|
| 105379 |
+
"learning_rate": 2.6815325117690694e-06,
|
| 105380 |
+
"loss": 0.6689,
|
| 105381 |
+
"step": 15053
|
| 105382 |
+
},
|
| 105383 |
+
{
|
| 105384 |
+
"epoch": 17.145868945868944,
|
| 105385 |
+
"grad_norm": 0.18489223718643188,
|
| 105386 |
+
"learning_rate": 2.6794335899517464e-06,
|
| 105387 |
+
"loss": 0.665,
|
| 105388 |
+
"step": 15054
|
| 105389 |
+
},
|
| 105390 |
+
{
|
| 105391 |
+
"epoch": 17.147008547008546,
|
| 105392 |
+
"grad_norm": 0.21992330253124237,
|
| 105393 |
+
"learning_rate": 2.677335443389073e-06,
|
| 105394 |
+
"loss": 0.7136,
|
| 105395 |
+
"step": 15055
|
| 105396 |
+
},
|
| 105397 |
+
{
|
| 105398 |
+
"epoch": 17.14814814814815,
|
| 105399 |
+
"grad_norm": 0.19489827752113342,
|
| 105400 |
+
"learning_rate": 2.6752380721539265e-06,
|
| 105401 |
+
"loss": 0.814,
|
| 105402 |
+
"step": 15056
|
| 105403 |
+
},
|
| 105404 |
+
{
|
| 105405 |
+
"epoch": 17.149287749287748,
|
| 105406 |
+
"grad_norm": 0.20492392778396606,
|
| 105407 |
+
"learning_rate": 2.673141476319155e-06,
|
| 105408 |
+
"loss": 0.5362,
|
| 105409 |
+
"step": 15057
|
| 105410 |
+
},
|
| 105411 |
+
{
|
| 105412 |
+
"epoch": 17.15042735042735,
|
| 105413 |
+
"grad_norm": 0.15961942076683044,
|
| 105414 |
+
"learning_rate": 2.6710456559575827e-06,
|
| 105415 |
+
"loss": 0.7438,
|
| 105416 |
+
"step": 15058
|
| 105417 |
+
},
|
| 105418 |
+
{
|
| 105419 |
+
"epoch": 17.151566951566952,
|
| 105420 |
+
"grad_norm": 0.21485158801078796,
|
| 105421 |
+
"learning_rate": 2.6689506111419905e-06,
|
| 105422 |
+
"loss": 0.6132,
|
| 105423 |
+
"step": 15059
|
| 105424 |
+
},
|
| 105425 |
+
{
|
| 105426 |
+
"epoch": 17.15270655270655,
|
| 105427 |
+
"grad_norm": 0.21094417572021484,
|
| 105428 |
+
"learning_rate": 2.6668563419451593e-06,
|
| 105429 |
+
"loss": 0.6229,
|
| 105430 |
+
"step": 15060
|
| 105431 |
+
},
|
| 105432 |
+
{
|
| 105433 |
+
"epoch": 17.153846153846153,
|
| 105434 |
+
"grad_norm": 0.18798595666885376,
|
| 105435 |
+
"learning_rate": 2.664762848439825e-06,
|
| 105436 |
+
"loss": 0.6037,
|
| 105437 |
+
"step": 15061
|
| 105438 |
+
},
|
| 105439 |
+
{
|
| 105440 |
+
"epoch": 17.154985754985756,
|
| 105441 |
+
"grad_norm": 0.2002788484096527,
|
| 105442 |
+
"learning_rate": 2.6626701306987024e-06,
|
| 105443 |
+
"loss": 0.672,
|
| 105444 |
+
"step": 15062
|
| 105445 |
+
},
|
| 105446 |
+
{
|
| 105447 |
+
"epoch": 17.156125356125354,
|
| 105448 |
+
"grad_norm": 0.18735022842884064,
|
| 105449 |
+
"learning_rate": 2.6605781887944712e-06,
|
| 105450 |
+
"loss": 0.6756,
|
| 105451 |
+
"step": 15063
|
| 105452 |
+
},
|
| 105453 |
+
{
|
| 105454 |
+
"epoch": 17.157264957264957,
|
| 105455 |
+
"grad_norm": 0.22012650966644287,
|
| 105456 |
+
"learning_rate": 2.658487022799791e-06,
|
| 105457 |
+
"loss": 0.6063,
|
| 105458 |
+
"step": 15064
|
| 105459 |
+
},
|
| 105460 |
+
{
|
| 105461 |
+
"epoch": 17.15840455840456,
|
| 105462 |
+
"grad_norm": 0.18836379051208496,
|
| 105463 |
+
"learning_rate": 2.6563966327872974e-06,
|
| 105464 |
+
"loss": 0.6306,
|
| 105465 |
+
"step": 15065
|
| 105466 |
+
},
|
| 105467 |
+
{
|
| 105468 |
+
"epoch": 17.159544159544158,
|
| 105469 |
+
"grad_norm": 0.18315967917442322,
|
| 105470 |
+
"learning_rate": 2.6543070188295963e-06,
|
| 105471 |
+
"loss": 0.7899,
|
| 105472 |
+
"step": 15066
|
| 105473 |
+
},
|
| 105474 |
+
{
|
| 105475 |
+
"epoch": 17.16068376068376,
|
| 105476 |
+
"grad_norm": 0.27469968795776367,
|
| 105477 |
+
"learning_rate": 2.652218180999261e-06,
|
| 105478 |
+
"loss": 0.5203,
|
| 105479 |
+
"step": 15067
|
| 105480 |
+
},
|
| 105481 |
+
{
|
| 105482 |
+
"epoch": 17.161823361823362,
|
| 105483 |
+
"grad_norm": 0.27029624581336975,
|
| 105484 |
+
"learning_rate": 2.6501301193688434e-06,
|
| 105485 |
+
"loss": 0.4631,
|
| 105486 |
+
"step": 15068
|
| 105487 |
+
},
|
| 105488 |
+
{
|
| 105489 |
+
"epoch": 17.162962962962965,
|
| 105490 |
+
"grad_norm": 0.21950866281986237,
|
| 105491 |
+
"learning_rate": 2.648042834010869e-06,
|
| 105492 |
+
"loss": 0.7517,
|
| 105493 |
+
"step": 15069
|
| 105494 |
+
},
|
| 105495 |
+
{
|
| 105496 |
+
"epoch": 17.164102564102564,
|
| 105497 |
+
"grad_norm": 0.1846713274717331,
|
| 105498 |
+
"learning_rate": 2.6459563249978418e-06,
|
| 105499 |
+
"loss": 0.6662,
|
| 105500 |
+
"step": 15070
|
| 105501 |
+
},
|
| 105502 |
+
{
|
| 105503 |
+
"epoch": 17.165242165242166,
|
| 105504 |
+
"grad_norm": 0.1970273107290268,
|
| 105505 |
+
"learning_rate": 2.6438705924022143e-06,
|
| 105506 |
+
"loss": 0.5449,
|
| 105507 |
+
"step": 15071
|
| 105508 |
+
},
|
| 105509 |
+
{
|
| 105510 |
+
"epoch": 17.166381766381768,
|
| 105511 |
+
"grad_norm": 0.2726972997188568,
|
| 105512 |
+
"learning_rate": 2.6417856362964457e-06,
|
| 105513 |
+
"loss": 0.5926,
|
| 105514 |
+
"step": 15072
|
| 105515 |
+
},
|
| 105516 |
+
{
|
| 105517 |
+
"epoch": 17.167521367521367,
|
| 105518 |
+
"grad_norm": 0.24256399273872375,
|
| 105519 |
+
"learning_rate": 2.6397014567529523e-06,
|
| 105520 |
+
"loss": 0.5511,
|
| 105521 |
+
"step": 15073
|
| 105522 |
+
},
|
| 105523 |
+
{
|
| 105524 |
+
"epoch": 17.16866096866097,
|
| 105525 |
+
"grad_norm": 0.1722864955663681,
|
| 105526 |
+
"learning_rate": 2.6376180538441155e-06,
|
| 105527 |
+
"loss": 0.7555,
|
| 105528 |
+
"step": 15074
|
| 105529 |
+
},
|
| 105530 |
+
{
|
| 105531 |
+
"epoch": 17.16980056980057,
|
| 105532 |
+
"grad_norm": 0.19845432043075562,
|
| 105533 |
+
"learning_rate": 2.6355354276422965e-06,
|
| 105534 |
+
"loss": 0.6709,
|
| 105535 |
+
"step": 15075
|
| 105536 |
+
},
|
| 105537 |
+
{
|
| 105538 |
+
"epoch": 17.17094017094017,
|
| 105539 |
+
"grad_norm": 0.24148957431316376,
|
| 105540 |
+
"learning_rate": 2.6334535782198383e-06,
|
| 105541 |
+
"loss": 0.8372,
|
| 105542 |
+
"step": 15076
|
| 105543 |
+
},
|
| 105544 |
+
{
|
| 105545 |
+
"epoch": 17.172079772079773,
|
| 105546 |
+
"grad_norm": 0.18178880214691162,
|
| 105547 |
+
"learning_rate": 2.6313725056490462e-06,
|
| 105548 |
+
"loss": 0.6222,
|
| 105549 |
+
"step": 15077
|
| 105550 |
+
},
|
| 105551 |
+
{
|
| 105552 |
+
"epoch": 17.173219373219375,
|
| 105553 |
+
"grad_norm": 0.19029007852077484,
|
| 105554 |
+
"learning_rate": 2.6292922100021988e-06,
|
| 105555 |
+
"loss": 0.6022,
|
| 105556 |
+
"step": 15078
|
| 105557 |
+
},
|
| 105558 |
+
{
|
| 105559 |
+
"epoch": 17.174358974358974,
|
| 105560 |
+
"grad_norm": 0.23001739382743835,
|
| 105561 |
+
"learning_rate": 2.627212691351555e-06,
|
| 105562 |
+
"loss": 0.7335,
|
| 105563 |
+
"step": 15079
|
| 105564 |
+
},
|
| 105565 |
+
{
|
| 105566 |
+
"epoch": 17.175498575498576,
|
| 105567 |
+
"grad_norm": 0.1962716430425644,
|
| 105568 |
+
"learning_rate": 2.6251339497693345e-06,
|
| 105569 |
+
"loss": 0.6243,
|
| 105570 |
+
"step": 15080
|
| 105571 |
+
},
|
| 105572 |
+
{
|
| 105573 |
+
"epoch": 17.17663817663818,
|
| 105574 |
+
"grad_norm": 0.19968239963054657,
|
| 105575 |
+
"learning_rate": 2.623055985327752e-06,
|
| 105576 |
+
"loss": 0.735,
|
| 105577 |
+
"step": 15081
|
| 105578 |
+
},
|
| 105579 |
+
{
|
| 105580 |
+
"epoch": 17.177777777777777,
|
| 105581 |
+
"grad_norm": 0.1714327186346054,
|
| 105582 |
+
"learning_rate": 2.620978798098961e-06,
|
| 105583 |
+
"loss": 0.6554,
|
| 105584 |
+
"step": 15082
|
| 105585 |
+
},
|
| 105586 |
+
{
|
| 105587 |
+
"epoch": 17.17891737891738,
|
| 105588 |
+
"grad_norm": 0.3115695118904114,
|
| 105589 |
+
"learning_rate": 2.618902388155123e-06,
|
| 105590 |
+
"loss": 0.377,
|
| 105591 |
+
"step": 15083
|
| 105592 |
+
},
|
| 105593 |
+
{
|
| 105594 |
+
"epoch": 17.180056980056982,
|
| 105595 |
+
"grad_norm": 0.17888757586479187,
|
| 105596 |
+
"learning_rate": 2.6168267555683577e-06,
|
| 105597 |
+
"loss": 0.6587,
|
| 105598 |
+
"step": 15084
|
| 105599 |
+
},
|
| 105600 |
+
{
|
| 105601 |
+
"epoch": 17.18119658119658,
|
| 105602 |
+
"grad_norm": 0.19751174747943878,
|
| 105603 |
+
"learning_rate": 2.61475190041075e-06,
|
| 105604 |
+
"loss": 0.4875,
|
| 105605 |
+
"step": 15085
|
| 105606 |
+
},
|
| 105607 |
+
{
|
| 105608 |
+
"epoch": 17.182336182336183,
|
| 105609 |
+
"grad_norm": 0.21054057776927948,
|
| 105610 |
+
"learning_rate": 2.61267782275437e-06,
|
| 105611 |
+
"loss": 0.6593,
|
| 105612 |
+
"step": 15086
|
| 105613 |
+
},
|
| 105614 |
+
{
|
| 105615 |
+
"epoch": 17.183475783475785,
|
| 105616 |
+
"grad_norm": 0.25037145614624023,
|
| 105617 |
+
"learning_rate": 2.6106045226712505e-06,
|
| 105618 |
+
"loss": 0.5905,
|
| 105619 |
+
"step": 15087
|
| 105620 |
+
},
|
| 105621 |
+
{
|
| 105622 |
+
"epoch": 17.184615384615384,
|
| 105623 |
+
"grad_norm": 0.19860246777534485,
|
| 105624 |
+
"learning_rate": 2.6085320002334157e-06,
|
| 105625 |
+
"loss": 0.5893,
|
| 105626 |
+
"step": 15088
|
| 105627 |
+
},
|
| 105628 |
+
{
|
| 105629 |
+
"epoch": 17.185754985754986,
|
| 105630 |
+
"grad_norm": 0.17965349555015564,
|
| 105631 |
+
"learning_rate": 2.6064602555128326e-06,
|
| 105632 |
+
"loss": 0.7347,
|
| 105633 |
+
"step": 15089
|
| 105634 |
+
},
|
| 105635 |
+
{
|
| 105636 |
+
"epoch": 17.18689458689459,
|
| 105637 |
+
"grad_norm": 0.16771340370178223,
|
| 105638 |
+
"learning_rate": 2.6043892885814687e-06,
|
| 105639 |
+
"loss": 0.6191,
|
| 105640 |
+
"step": 15090
|
| 105641 |
+
},
|
| 105642 |
+
{
|
| 105643 |
+
"epoch": 17.188034188034187,
|
| 105644 |
+
"grad_norm": 0.17742687463760376,
|
| 105645 |
+
"learning_rate": 2.602319099511255e-06,
|
| 105646 |
+
"loss": 0.5684,
|
| 105647 |
+
"step": 15091
|
| 105648 |
+
},
|
| 105649 |
+
{
|
| 105650 |
+
"epoch": 17.18917378917379,
|
| 105651 |
+
"grad_norm": 0.18416251242160797,
|
| 105652 |
+
"learning_rate": 2.600249688374096e-06,
|
| 105653 |
+
"loss": 0.6517,
|
| 105654 |
+
"step": 15092
|
| 105655 |
+
},
|
| 105656 |
+
{
|
| 105657 |
+
"epoch": 17.190313390313392,
|
| 105658 |
+
"grad_norm": 0.1928284615278244,
|
| 105659 |
+
"learning_rate": 2.598181055241855e-06,
|
| 105660 |
+
"loss": 0.5186,
|
| 105661 |
+
"step": 15093
|
| 105662 |
+
},
|
| 105663 |
+
{
|
| 105664 |
+
"epoch": 17.19145299145299,
|
| 105665 |
+
"grad_norm": 0.16785964369773865,
|
| 105666 |
+
"learning_rate": 2.596113200186395e-06,
|
| 105667 |
+
"loss": 0.6798,
|
| 105668 |
+
"step": 15094
|
| 105669 |
+
},
|
| 105670 |
+
{
|
| 105671 |
+
"epoch": 17.192592592592593,
|
| 105672 |
+
"grad_norm": 0.2011309266090393,
|
| 105673 |
+
"learning_rate": 2.5940461232795422e-06,
|
| 105674 |
+
"loss": 0.782,
|
| 105675 |
+
"step": 15095
|
| 105676 |
+
},
|
| 105677 |
+
{
|
| 105678 |
+
"epoch": 17.193732193732195,
|
| 105679 |
+
"grad_norm": 0.15364626049995422,
|
| 105680 |
+
"learning_rate": 2.5919798245930772e-06,
|
| 105681 |
+
"loss": 0.8462,
|
| 105682 |
+
"step": 15096
|
| 105683 |
+
},
|
| 105684 |
+
{
|
| 105685 |
+
"epoch": 17.194871794871794,
|
| 105686 |
+
"grad_norm": 0.1840306669473648,
|
| 105687 |
+
"learning_rate": 2.589914304198776e-06,
|
| 105688 |
+
"loss": 0.8393,
|
| 105689 |
+
"step": 15097
|
| 105690 |
+
},
|
| 105691 |
+
{
|
| 105692 |
+
"epoch": 17.196011396011396,
|
| 105693 |
+
"grad_norm": 0.21074427664279938,
|
| 105694 |
+
"learning_rate": 2.587849562168379e-06,
|
| 105695 |
+
"loss": 0.5411,
|
| 105696 |
+
"step": 15098
|
| 105697 |
+
},
|
| 105698 |
+
{
|
| 105699 |
+
"epoch": 17.197150997151,
|
| 105700 |
+
"grad_norm": 0.17806614935398102,
|
| 105701 |
+
"learning_rate": 2.585785598573601e-06,
|
| 105702 |
+
"loss": 0.5351,
|
| 105703 |
+
"step": 15099
|
| 105704 |
+
},
|
| 105705 |
+
{
|
| 105706 |
+
"epoch": 17.198290598290598,
|
| 105707 |
+
"grad_norm": 0.2036043405532837,
|
| 105708 |
+
"learning_rate": 2.583722413486131e-06,
|
| 105709 |
+
"loss": 0.8876,
|
| 105710 |
+
"step": 15100
|
| 105711 |
+
},
|
| 105712 |
+
{
|
| 105713 |
+
"epoch": 17.1994301994302,
|
| 105714 |
+
"grad_norm": 0.16868923604488373,
|
| 105715 |
+
"learning_rate": 2.5816600069776236e-06,
|
| 105716 |
+
"loss": 0.8464,
|
| 105717 |
+
"step": 15101
|
| 105718 |
+
},
|
| 105719 |
+
{
|
| 105720 |
+
"epoch": 17.200569800569802,
|
| 105721 |
+
"grad_norm": 0.21217770874500275,
|
| 105722 |
+
"learning_rate": 2.579598379119716e-06,
|
| 105723 |
+
"loss": 0.8388,
|
| 105724 |
+
"step": 15102
|
| 105725 |
+
},
|
| 105726 |
+
{
|
| 105727 |
+
"epoch": 17.2017094017094,
|
| 105728 |
+
"grad_norm": 0.22023366391658783,
|
| 105729 |
+
"learning_rate": 2.5775375299840117e-06,
|
| 105730 |
+
"loss": 0.8549,
|
| 105731 |
+
"step": 15103
|
| 105732 |
+
},
|
| 105733 |
+
{
|
| 105734 |
+
"epoch": 17.202849002849003,
|
| 105735 |
+
"grad_norm": 0.1760704666376114,
|
| 105736 |
+
"learning_rate": 2.5754774596420893e-06,
|
| 105737 |
+
"loss": 0.8507,
|
| 105738 |
+
"step": 15104
|
| 105739 |
+
},
|
| 105740 |
+
{
|
| 105741 |
+
"epoch": 17.203988603988606,
|
| 105742 |
+
"grad_norm": 0.1938440203666687,
|
| 105743 |
+
"learning_rate": 2.5734181681655035e-06,
|
| 105744 |
+
"loss": 0.6966,
|
| 105745 |
+
"step": 15105
|
| 105746 |
+
},
|
| 105747 |
+
{
|
| 105748 |
+
"epoch": 17.205128205128204,
|
| 105749 |
+
"grad_norm": 0.17305469512939453,
|
| 105750 |
+
"learning_rate": 2.57135965562578e-06,
|
| 105751 |
+
"loss": 0.5743,
|
| 105752 |
+
"step": 15106
|
| 105753 |
+
},
|
| 105754 |
+
{
|
| 105755 |
+
"epoch": 17.206267806267807,
|
| 105756 |
+
"grad_norm": 0.22448083758354187,
|
| 105757 |
+
"learning_rate": 2.5693019220944163e-06,
|
| 105758 |
+
"loss": 0.6156,
|
| 105759 |
+
"step": 15107
|
| 105760 |
+
},
|
| 105761 |
+
{
|
| 105762 |
+
"epoch": 17.20740740740741,
|
| 105763 |
+
"grad_norm": 0.262983113527298,
|
| 105764 |
+
"learning_rate": 2.567244967642879e-06,
|
| 105765 |
+
"loss": 0.5224,
|
| 105766 |
+
"step": 15108
|
| 105767 |
+
},
|
| 105768 |
+
{
|
| 105769 |
+
"epoch": 17.208547008547008,
|
| 105770 |
+
"grad_norm": 0.23758886754512787,
|
| 105771 |
+
"learning_rate": 2.5651887923426155e-06,
|
| 105772 |
+
"loss": 0.5305,
|
| 105773 |
+
"step": 15109
|
| 105774 |
+
},
|
| 105775 |
+
{
|
| 105776 |
+
"epoch": 17.20968660968661,
|
| 105777 |
+
"grad_norm": 0.21146656572818756,
|
| 105778 |
+
"learning_rate": 2.5631333962650384e-06,
|
| 105779 |
+
"loss": 0.5773,
|
| 105780 |
+
"step": 15110
|
| 105781 |
+
},
|
| 105782 |
+
{
|
| 105783 |
+
"epoch": 17.210826210826212,
|
| 105784 |
+
"grad_norm": 0.156987264752388,
|
| 105785 |
+
"learning_rate": 2.561078779481546e-06,
|
| 105786 |
+
"loss": 0.6917,
|
| 105787 |
+
"step": 15111
|
| 105788 |
+
},
|
| 105789 |
+
{
|
| 105790 |
+
"epoch": 17.21196581196581,
|
| 105791 |
+
"grad_norm": 0.21438787877559662,
|
| 105792 |
+
"learning_rate": 2.55902494206349e-06,
|
| 105793 |
+
"loss": 0.6482,
|
| 105794 |
+
"step": 15112
|
| 105795 |
+
},
|
| 105796 |
+
{
|
| 105797 |
+
"epoch": 17.213105413105414,
|
| 105798 |
+
"grad_norm": 0.2056760936975479,
|
| 105799 |
+
"learning_rate": 2.55697188408221e-06,
|
| 105800 |
+
"loss": 0.6169,
|
| 105801 |
+
"step": 15113
|
| 105802 |
+
},
|
| 105803 |
+
{
|
| 105804 |
+
"epoch": 17.214245014245016,
|
| 105805 |
+
"grad_norm": 0.19529277086257935,
|
| 105806 |
+
"learning_rate": 2.5549196056090156e-06,
|
| 105807 |
+
"loss": 0.8006,
|
| 105808 |
+
"step": 15114
|
| 105809 |
+
},
|
| 105810 |
+
{
|
| 105811 |
+
"epoch": 17.215384615384615,
|
| 105812 |
+
"grad_norm": 0.23238149285316467,
|
| 105813 |
+
"learning_rate": 2.5528681067151866e-06,
|
| 105814 |
+
"loss": 0.4384,
|
| 105815 |
+
"step": 15115
|
| 105816 |
+
},
|
| 105817 |
+
{
|
| 105818 |
+
"epoch": 17.216524216524217,
|
| 105819 |
+
"grad_norm": 0.15593671798706055,
|
| 105820 |
+
"learning_rate": 2.5508173874719793e-06,
|
| 105821 |
+
"loss": 0.857,
|
| 105822 |
+
"step": 15116
|
| 105823 |
+
},
|
| 105824 |
+
{
|
| 105825 |
+
"epoch": 17.21766381766382,
|
| 105826 |
+
"grad_norm": 0.22833791375160217,
|
| 105827 |
+
"learning_rate": 2.5487674479506202e-06,
|
| 105828 |
+
"loss": 0.5225,
|
| 105829 |
+
"step": 15117
|
| 105830 |
+
},
|
| 105831 |
+
{
|
| 105832 |
+
"epoch": 17.218803418803418,
|
| 105833 |
+
"grad_norm": 0.1787058562040329,
|
| 105834 |
+
"learning_rate": 2.5467182882223135e-06,
|
| 105835 |
+
"loss": 0.7493,
|
| 105836 |
+
"step": 15118
|
| 105837 |
+
},
|
| 105838 |
+
{
|
| 105839 |
+
"epoch": 17.21994301994302,
|
| 105840 |
+
"grad_norm": 0.18769077956676483,
|
| 105841 |
+
"learning_rate": 2.5446699083582213e-06,
|
| 105842 |
+
"loss": 0.7195,
|
| 105843 |
+
"step": 15119
|
| 105844 |
+
},
|
| 105845 |
+
{
|
| 105846 |
+
"epoch": 17.221082621082623,
|
| 105847 |
+
"grad_norm": 0.20116661489009857,
|
| 105848 |
+
"learning_rate": 2.5426223084294957e-06,
|
| 105849 |
+
"loss": 0.5683,
|
| 105850 |
+
"step": 15120
|
| 105851 |
+
},
|
| 105852 |
+
{
|
| 105853 |
+
"epoch": 17.22222222222222,
|
| 105854 |
+
"grad_norm": 0.1935771405696869,
|
| 105855 |
+
"learning_rate": 2.540575488507255e-06,
|
| 105856 |
+
"loss": 0.7803,
|
| 105857 |
+
"step": 15121
|
| 105858 |
+
},
|
| 105859 |
+
{
|
| 105860 |
+
"epoch": 17.223361823361824,
|
| 105861 |
+
"grad_norm": 0.21847252547740936,
|
| 105862 |
+
"learning_rate": 2.5385294486625966e-06,
|
| 105863 |
+
"loss": 0.631,
|
| 105864 |
+
"step": 15122
|
| 105865 |
+
},
|
| 105866 |
+
{
|
| 105867 |
+
"epoch": 17.224501424501426,
|
| 105868 |
+
"grad_norm": 0.16176047921180725,
|
| 105869 |
+
"learning_rate": 2.536484188966576e-06,
|
| 105870 |
+
"loss": 0.8931,
|
| 105871 |
+
"step": 15123
|
| 105872 |
+
},
|
| 105873 |
+
{
|
| 105874 |
+
"epoch": 17.225641025641025,
|
| 105875 |
+
"grad_norm": 0.17873893678188324,
|
| 105876 |
+
"learning_rate": 2.534439709490233e-06,
|
| 105877 |
+
"loss": 0.7149,
|
| 105878 |
+
"step": 15124
|
| 105879 |
+
},
|
| 105880 |
+
{
|
| 105881 |
+
"epoch": 17.226780626780627,
|
| 105882 |
+
"grad_norm": 0.21072816848754883,
|
| 105883 |
+
"learning_rate": 2.53239601030458e-06,
|
| 105884 |
+
"loss": 0.6532,
|
| 105885 |
+
"step": 15125
|
| 105886 |
+
},
|
| 105887 |
+
{
|
| 105888 |
+
"epoch": 17.22792022792023,
|
| 105889 |
+
"grad_norm": 0.17184481024742126,
|
| 105890 |
+
"learning_rate": 2.5303530914806e-06,
|
| 105891 |
+
"loss": 0.2553,
|
| 105892 |
+
"step": 15126
|
| 105893 |
+
},
|
| 105894 |
+
{
|
| 105895 |
+
"epoch": 17.22905982905983,
|
| 105896 |
+
"grad_norm": 0.22857321798801422,
|
| 105897 |
+
"learning_rate": 2.5283109530892496e-06,
|
| 105898 |
+
"loss": 0.5491,
|
| 105899 |
+
"step": 15127
|
| 105900 |
+
},
|
| 105901 |
+
{
|
| 105902 |
+
"epoch": 17.23019943019943,
|
| 105903 |
+
"grad_norm": 0.17435388267040253,
|
| 105904 |
+
"learning_rate": 2.5262695952014556e-06,
|
| 105905 |
+
"loss": 0.7099,
|
| 105906 |
+
"step": 15128
|
| 105907 |
+
},
|
| 105908 |
+
{
|
| 105909 |
+
"epoch": 17.231339031339033,
|
| 105910 |
+
"grad_norm": 0.26842939853668213,
|
| 105911 |
+
"learning_rate": 2.524229017888127e-06,
|
| 105912 |
+
"loss": 0.6365,
|
| 105913 |
+
"step": 15129
|
| 105914 |
+
},
|
| 105915 |
+
{
|
| 105916 |
+
"epoch": 17.23247863247863,
|
| 105917 |
+
"grad_norm": 0.16448654234409332,
|
| 105918 |
+
"learning_rate": 2.522189221220128e-06,
|
| 105919 |
+
"loss": 0.7441,
|
| 105920 |
+
"step": 15130
|
| 105921 |
+
},
|
| 105922 |
+
{
|
| 105923 |
+
"epoch": 17.233618233618234,
|
| 105924 |
+
"grad_norm": 0.1764354705810547,
|
| 105925 |
+
"learning_rate": 2.5201502052683122e-06,
|
| 105926 |
+
"loss": 0.6701,
|
| 105927 |
+
"step": 15131
|
| 105928 |
+
},
|
| 105929 |
+
{
|
| 105930 |
+
"epoch": 17.234757834757836,
|
| 105931 |
+
"grad_norm": 0.2524125874042511,
|
| 105932 |
+
"learning_rate": 2.518111970103498e-06,
|
| 105933 |
+
"loss": 0.6656,
|
| 105934 |
+
"step": 15132
|
| 105935 |
+
},
|
| 105936 |
+
{
|
| 105937 |
+
"epoch": 17.235897435897435,
|
| 105938 |
+
"grad_norm": 0.200727179646492,
|
| 105939 |
+
"learning_rate": 2.516074515796488e-06,
|
| 105940 |
+
"loss": 0.6161,
|
| 105941 |
+
"step": 15133
|
| 105942 |
+
},
|
| 105943 |
+
{
|
| 105944 |
+
"epoch": 17.237037037037037,
|
| 105945 |
+
"grad_norm": 0.16180752217769623,
|
| 105946 |
+
"learning_rate": 2.514037842418035e-06,
|
| 105947 |
+
"loss": 0.7554,
|
| 105948 |
+
"step": 15134
|
| 105949 |
+
},
|
| 105950 |
+
{
|
| 105951 |
+
"epoch": 17.23817663817664,
|
| 105952 |
+
"grad_norm": 0.232011616230011,
|
| 105953 |
+
"learning_rate": 2.512001950038884e-06,
|
| 105954 |
+
"loss": 0.7122,
|
| 105955 |
+
"step": 15135
|
| 105956 |
+
},
|
| 105957 |
+
{
|
| 105958 |
+
"epoch": 17.23931623931624,
|
| 105959 |
+
"grad_norm": 0.6532576084136963,
|
| 105960 |
+
"learning_rate": 2.5099668387297463e-06,
|
| 105961 |
+
"loss": 0.8184,
|
| 105962 |
+
"step": 15136
|
| 105963 |
+
},
|
| 105964 |
+
{
|
| 105965 |
+
"epoch": 17.24045584045584,
|
| 105966 |
+
"grad_norm": 0.18580158054828644,
|
| 105967 |
+
"learning_rate": 2.5079325085613113e-06,
|
| 105968 |
+
"loss": 0.4323,
|
| 105969 |
+
"step": 15137
|
| 105970 |
+
},
|
| 105971 |
+
{
|
| 105972 |
+
"epoch": 17.241595441595443,
|
| 105973 |
+
"grad_norm": 0.182058647274971,
|
| 105974 |
+
"learning_rate": 2.5058989596042303e-06,
|
| 105975 |
+
"loss": 0.6386,
|
| 105976 |
+
"step": 15138
|
| 105977 |
+
},
|
| 105978 |
+
{
|
| 105979 |
+
"epoch": 17.242735042735042,
|
| 105980 |
+
"grad_norm": 0.19445578753948212,
|
| 105981 |
+
"learning_rate": 2.5038661919291385e-06,
|
| 105982 |
+
"loss": 0.777,
|
| 105983 |
+
"step": 15139
|
| 105984 |
+
},
|
| 105985 |
+
{
|
| 105986 |
+
"epoch": 17.243874643874644,
|
| 105987 |
+
"grad_norm": 0.2003912627696991,
|
| 105988 |
+
"learning_rate": 2.50183420560664e-06,
|
| 105989 |
+
"loss": 0.8179,
|
| 105990 |
+
"step": 15140
|
| 105991 |
+
},
|
| 105992 |
+
{
|
| 105993 |
+
"epoch": 17.245014245014247,
|
| 105994 |
+
"grad_norm": 0.16480514407157898,
|
| 105995 |
+
"learning_rate": 2.4998030007073124e-06,
|
| 105996 |
+
"loss": 0.7508,
|
| 105997 |
+
"step": 15141
|
| 105998 |
+
},
|
| 105999 |
+
{
|
| 106000 |
+
"epoch": 17.246153846153845,
|
| 106001 |
+
"grad_norm": 0.16990984976291656,
|
| 106002 |
+
"learning_rate": 2.497772577301699e-06,
|
| 106003 |
+
"loss": 0.816,
|
| 106004 |
+
"step": 15142
|
| 106005 |
+
},
|
| 106006 |
+
{
|
| 106007 |
+
"epoch": 17.247293447293448,
|
| 106008 |
+
"grad_norm": 0.2032969444990158,
|
| 106009 |
+
"learning_rate": 2.495742935460327e-06,
|
| 106010 |
+
"loss": 0.4964,
|
| 106011 |
+
"step": 15143
|
| 106012 |
+
},
|
| 106013 |
+
{
|
| 106014 |
+
"epoch": 17.24843304843305,
|
| 106015 |
+
"grad_norm": 0.2081516683101654,
|
| 106016 |
+
"learning_rate": 2.4937140752536862e-06,
|
| 106017 |
+
"loss": 0.5333,
|
| 106018 |
+
"step": 15144
|
| 106019 |
+
},
|
| 106020 |
+
{
|
| 106021 |
+
"epoch": 17.24957264957265,
|
| 106022 |
+
"grad_norm": 0.18476413190364838,
|
| 106023 |
+
"learning_rate": 2.491685996752255e-06,
|
| 106024 |
+
"loss": 0.6408,
|
| 106025 |
+
"step": 15145
|
| 106026 |
+
},
|
| 106027 |
+
{
|
| 106028 |
+
"epoch": 17.25071225071225,
|
| 106029 |
+
"grad_norm": 0.17959251999855042,
|
| 106030 |
+
"learning_rate": 2.489658700026462e-06,
|
| 106031 |
+
"loss": 0.7507,
|
| 106032 |
+
"step": 15146
|
| 106033 |
+
},
|
| 106034 |
+
{
|
| 106035 |
+
"epoch": 17.251851851851853,
|
| 106036 |
+
"grad_norm": 0.23477263748645782,
|
| 106037 |
+
"learning_rate": 2.4876321851467267e-06,
|
| 106038 |
+
"loss": 0.4408,
|
| 106039 |
+
"step": 15147
|
| 106040 |
+
},
|
| 106041 |
+
{
|
| 106042 |
+
"epoch": 17.252991452991452,
|
| 106043 |
+
"grad_norm": 0.17450331151485443,
|
| 106044 |
+
"learning_rate": 2.485606452183434e-06,
|
| 106045 |
+
"loss": 0.6646,
|
| 106046 |
+
"step": 15148
|
| 106047 |
+
},
|
| 106048 |
+
{
|
| 106049 |
+
"epoch": 17.254131054131054,
|
| 106050 |
+
"grad_norm": 0.2237221598625183,
|
| 106051 |
+
"learning_rate": 2.483581501206944e-06,
|
| 106052 |
+
"loss": 0.6068,
|
| 106053 |
+
"step": 15149
|
| 106054 |
+
},
|
| 106055 |
+
{
|
| 106056 |
+
"epoch": 17.255270655270657,
|
| 106057 |
+
"grad_norm": 0.17771928012371063,
|
| 106058 |
+
"learning_rate": 2.48155733228759e-06,
|
| 106059 |
+
"loss": 0.7018,
|
| 106060 |
+
"step": 15150
|
| 106061 |
+
},
|
| 106062 |
+
{
|
| 106063 |
+
"epoch": 17.256410256410255,
|
| 106064 |
+
"grad_norm": 0.1876230239868164,
|
| 106065 |
+
"learning_rate": 2.479533945495674e-06,
|
| 106066 |
+
"loss": 0.5195,
|
| 106067 |
+
"step": 15151
|
| 106068 |
+
},
|
| 106069 |
+
{
|
| 106070 |
+
"epoch": 17.257549857549858,
|
| 106071 |
+
"grad_norm": 0.24191851913928986,
|
| 106072 |
+
"learning_rate": 2.477511340901481e-06,
|
| 106073 |
+
"loss": 0.3405,
|
| 106074 |
+
"step": 15152
|
| 106075 |
+
},
|
| 106076 |
+
{
|
| 106077 |
+
"epoch": 17.25868945868946,
|
| 106078 |
+
"grad_norm": 0.20344719290733337,
|
| 106079 |
+
"learning_rate": 2.4754895185752524e-06,
|
| 106080 |
+
"loss": 0.758,
|
| 106081 |
+
"step": 15153
|
| 106082 |
+
},
|
| 106083 |
+
{
|
| 106084 |
+
"epoch": 17.25982905982906,
|
| 106085 |
+
"grad_norm": 0.19998767971992493,
|
| 106086 |
+
"learning_rate": 2.4734684785872152e-06,
|
| 106087 |
+
"loss": 0.6802,
|
| 106088 |
+
"step": 15154
|
| 106089 |
+
},
|
| 106090 |
+
{
|
| 106091 |
+
"epoch": 17.26096866096866,
|
| 106092 |
+
"grad_norm": 0.18411147594451904,
|
| 106093 |
+
"learning_rate": 2.471448221007566e-06,
|
| 106094 |
+
"loss": 0.7494,
|
| 106095 |
+
"step": 15155
|
| 106096 |
+
},
|
| 106097 |
+
{
|
| 106098 |
+
"epoch": 17.262108262108264,
|
| 106099 |
+
"grad_norm": 0.2152598649263382,
|
| 106100 |
+
"learning_rate": 2.469428745906477e-06,
|
| 106101 |
+
"loss": 0.7002,
|
| 106102 |
+
"step": 15156
|
| 106103 |
+
},
|
| 106104 |
+
{
|
| 106105 |
+
"epoch": 17.263247863247862,
|
| 106106 |
+
"grad_norm": 0.19655431807041168,
|
| 106107 |
+
"learning_rate": 2.4674100533540854e-06,
|
| 106108 |
+
"loss": 0.6389,
|
| 106109 |
+
"step": 15157
|
| 106110 |
+
},
|
| 106111 |
+
{
|
| 106112 |
+
"epoch": 17.264387464387465,
|
| 106113 |
+
"grad_norm": 0.29243990778923035,
|
| 106114 |
+
"learning_rate": 2.465392143420506e-06,
|
| 106115 |
+
"loss": 0.4292,
|
| 106116 |
+
"step": 15158
|
| 106117 |
+
},
|
| 106118 |
+
{
|
| 106119 |
+
"epoch": 17.265527065527067,
|
| 106120 |
+
"grad_norm": 0.1663278341293335,
|
| 106121 |
+
"learning_rate": 2.463375016175826e-06,
|
| 106122 |
+
"loss": 0.7744,
|
| 106123 |
+
"step": 15159
|
| 106124 |
+
},
|
| 106125 |
+
{
|
| 106126 |
+
"epoch": 17.266666666666666,
|
| 106127 |
+
"grad_norm": 0.18078675866127014,
|
| 106128 |
+
"learning_rate": 2.4613586716901086e-06,
|
| 106129 |
+
"loss": 0.6191,
|
| 106130 |
+
"step": 15160
|
| 106131 |
+
},
|
| 106132 |
+
{
|
| 106133 |
+
"epoch": 17.267806267806268,
|
| 106134 |
+
"grad_norm": 0.18140067160129547,
|
| 106135 |
+
"learning_rate": 2.459343110033385e-06,
|
| 106136 |
+
"loss": 0.8783,
|
| 106137 |
+
"step": 15161
|
| 106138 |
+
},
|
| 106139 |
+
{
|
| 106140 |
+
"epoch": 17.26894586894587,
|
| 106141 |
+
"grad_norm": 0.20100940763950348,
|
| 106142 |
+
"learning_rate": 2.4573283312756657e-06,
|
| 106143 |
+
"loss": 0.7135,
|
| 106144 |
+
"step": 15162
|
| 106145 |
+
},
|
| 106146 |
+
{
|
| 106147 |
+
"epoch": 17.27008547008547,
|
| 106148 |
+
"grad_norm": 0.1684829294681549,
|
| 106149 |
+
"learning_rate": 2.4553143354869275e-06,
|
| 106150 |
+
"loss": 0.6913,
|
| 106151 |
+
"step": 15163
|
| 106152 |
+
},
|
| 106153 |
+
{
|
| 106154 |
+
"epoch": 17.27122507122507,
|
| 106155 |
+
"grad_norm": 0.1947351098060608,
|
| 106156 |
+
"learning_rate": 2.4533011227371172e-06,
|
| 106157 |
+
"loss": 0.6651,
|
| 106158 |
+
"step": 15164
|
| 106159 |
+
},
|
| 106160 |
+
{
|
| 106161 |
+
"epoch": 17.272364672364674,
|
| 106162 |
+
"grad_norm": 0.15982107818126678,
|
| 106163 |
+
"learning_rate": 2.4512886930961625e-06,
|
| 106164 |
+
"loss": 0.7237,
|
| 106165 |
+
"step": 15165
|
| 106166 |
+
},
|
| 106167 |
+
{
|
| 106168 |
+
"epoch": 17.273504273504273,
|
| 106169 |
+
"grad_norm": 0.22549481689929962,
|
| 106170 |
+
"learning_rate": 2.4492770466339605e-06,
|
| 106171 |
+
"loss": 0.7848,
|
| 106172 |
+
"step": 15166
|
| 106173 |
+
},
|
| 106174 |
+
{
|
| 106175 |
+
"epoch": 17.274643874643875,
|
| 106176 |
+
"grad_norm": 0.21762266755104065,
|
| 106177 |
+
"learning_rate": 2.447266183420388e-06,
|
| 106178 |
+
"loss": 0.5598,
|
| 106179 |
+
"step": 15167
|
| 106180 |
+
},
|
| 106181 |
+
{
|
| 106182 |
+
"epoch": 17.275783475783477,
|
| 106183 |
+
"grad_norm": 0.20130868256092072,
|
| 106184 |
+
"learning_rate": 2.445256103525273e-06,
|
| 106185 |
+
"loss": 0.6003,
|
| 106186 |
+
"step": 15168
|
| 106187 |
+
},
|
| 106188 |
+
{
|
| 106189 |
+
"epoch": 17.276923076923076,
|
| 106190 |
+
"grad_norm": 0.21412499248981476,
|
| 106191 |
+
"learning_rate": 2.4432468070184427e-06,
|
| 106192 |
+
"loss": 0.3147,
|
| 106193 |
+
"step": 15169
|
| 106194 |
+
},
|
| 106195 |
+
{
|
| 106196 |
+
"epoch": 17.27806267806268,
|
| 106197 |
+
"grad_norm": 0.19877946376800537,
|
| 106198 |
+
"learning_rate": 2.4412382939696803e-06,
|
| 106199 |
+
"loss": 0.4896,
|
| 106200 |
+
"step": 15170
|
| 106201 |
+
},
|
| 106202 |
+
{
|
| 106203 |
+
"epoch": 17.27920227920228,
|
| 106204 |
+
"grad_norm": 0.1650521606206894,
|
| 106205 |
+
"learning_rate": 2.439230564448747e-06,
|
| 106206 |
+
"loss": 0.6814,
|
| 106207 |
+
"step": 15171
|
| 106208 |
+
},
|
| 106209 |
+
{
|
| 106210 |
+
"epoch": 17.28034188034188,
|
| 106211 |
+
"grad_norm": 0.20785923302173615,
|
| 106212 |
+
"learning_rate": 2.4372236185253807e-06,
|
| 106213 |
+
"loss": 0.6307,
|
| 106214 |
+
"step": 15172
|
| 106215 |
+
},
|
| 106216 |
+
{
|
| 106217 |
+
"epoch": 17.28148148148148,
|
| 106218 |
+
"grad_norm": 0.2041858583688736,
|
| 106219 |
+
"learning_rate": 2.435217456269287e-06,
|
| 106220 |
+
"loss": 0.6081,
|
| 106221 |
+
"step": 15173
|
| 106222 |
+
},
|
| 106223 |
+
{
|
| 106224 |
+
"epoch": 17.282621082621084,
|
| 106225 |
+
"grad_norm": 0.16322332620620728,
|
| 106226 |
+
"learning_rate": 2.4332120777501467e-06,
|
| 106227 |
+
"loss": 0.7391,
|
| 106228 |
+
"step": 15174
|
| 106229 |
+
},
|
| 106230 |
+
{
|
| 106231 |
+
"epoch": 17.283760683760683,
|
| 106232 |
+
"grad_norm": 0.2151019424200058,
|
| 106233 |
+
"learning_rate": 2.4312074830376064e-06,
|
| 106234 |
+
"loss": 0.6587,
|
| 106235 |
+
"step": 15175
|
| 106236 |
+
},
|
| 106237 |
+
{
|
| 106238 |
+
"epoch": 17.284900284900285,
|
| 106239 |
+
"grad_norm": 0.155168816447258,
|
| 106240 |
+
"learning_rate": 2.4292036722012967e-06,
|
| 106241 |
+
"loss": 0.8807,
|
| 106242 |
+
"step": 15176
|
| 106243 |
+
},
|
| 106244 |
+
{
|
| 106245 |
+
"epoch": 17.286039886039887,
|
| 106246 |
+
"grad_norm": 0.24426081776618958,
|
| 106247 |
+
"learning_rate": 2.4272006453108094e-06,
|
| 106248 |
+
"loss": 0.1383,
|
| 106249 |
+
"step": 15177
|
| 106250 |
+
},
|
| 106251 |
+
{
|
| 106252 |
+
"epoch": 17.287179487179486,
|
| 106253 |
+
"grad_norm": 0.22462360560894012,
|
| 106254 |
+
"learning_rate": 2.425198402435722e-06,
|
| 106255 |
+
"loss": 0.4408,
|
| 106256 |
+
"step": 15178
|
| 106257 |
+
},
|
| 106258 |
+
{
|
| 106259 |
+
"epoch": 17.28831908831909,
|
| 106260 |
+
"grad_norm": 0.16213785111904144,
|
| 106261 |
+
"learning_rate": 2.423196943645578e-06,
|
| 106262 |
+
"loss": 0.6757,
|
| 106263 |
+
"step": 15179
|
| 106264 |
+
},
|
| 106265 |
+
{
|
| 106266 |
+
"epoch": 17.28945868945869,
|
| 106267 |
+
"grad_norm": 0.1819656491279602,
|
| 106268 |
+
"learning_rate": 2.421196269009887e-06,
|
| 106269 |
+
"loss": 0.6015,
|
| 106270 |
+
"step": 15180
|
| 106271 |
+
},
|
| 106272 |
+
{
|
| 106273 |
+
"epoch": 17.29059829059829,
|
| 106274 |
+
"grad_norm": 0.17803218960762024,
|
| 106275 |
+
"learning_rate": 2.4191963785981377e-06,
|
| 106276 |
+
"loss": 0.7926,
|
| 106277 |
+
"step": 15181
|
| 106278 |
+
},
|
| 106279 |
+
{
|
| 106280 |
+
"epoch": 17.291737891737892,
|
| 106281 |
+
"grad_norm": 0.17023199796676636,
|
| 106282 |
+
"learning_rate": 2.4171972724798016e-06,
|
| 106283 |
+
"loss": 0.5545,
|
| 106284 |
+
"step": 15182
|
| 106285 |
+
},
|
| 106286 |
+
{
|
| 106287 |
+
"epoch": 17.292877492877494,
|
| 106288 |
+
"grad_norm": 0.18583688139915466,
|
| 106289 |
+
"learning_rate": 2.4151989507243067e-06,
|
| 106290 |
+
"loss": 0.5112,
|
| 106291 |
+
"step": 15183
|
| 106292 |
+
},
|
| 106293 |
+
{
|
| 106294 |
+
"epoch": 17.294017094017093,
|
| 106295 |
+
"grad_norm": 0.18971213698387146,
|
| 106296 |
+
"learning_rate": 2.413201413401059e-06,
|
| 106297 |
+
"loss": 0.8555,
|
| 106298 |
+
"step": 15184
|
| 106299 |
+
},
|
| 106300 |
+
{
|
| 106301 |
+
"epoch": 17.295156695156695,
|
| 106302 |
+
"grad_norm": 0.24449138343334198,
|
| 106303 |
+
"learning_rate": 2.4112046605794386e-06,
|
| 106304 |
+
"loss": 0.5715,
|
| 106305 |
+
"step": 15185
|
| 106306 |
+
},
|
| 106307 |
+
{
|
| 106308 |
+
"epoch": 17.296296296296298,
|
| 106309 |
+
"grad_norm": 0.24402859807014465,
|
| 106310 |
+
"learning_rate": 2.409208692328804e-06,
|
| 106311 |
+
"loss": 0.6896,
|
| 106312 |
+
"step": 15186
|
| 106313 |
+
},
|
| 106314 |
+
{
|
| 106315 |
+
"epoch": 17.297435897435896,
|
| 106316 |
+
"grad_norm": 0.20627640187740326,
|
| 106317 |
+
"learning_rate": 2.4072135087184757e-06,
|
| 106318 |
+
"loss": 0.7722,
|
| 106319 |
+
"step": 15187
|
| 106320 |
+
},
|
| 106321 |
+
{
|
| 106322 |
+
"epoch": 17.2985754985755,
|
| 106323 |
+
"grad_norm": 0.24078132212162018,
|
| 106324 |
+
"learning_rate": 2.4052191098177494e-06,
|
| 106325 |
+
"loss": 0.4887,
|
| 106326 |
+
"step": 15188
|
| 106327 |
+
},
|
| 106328 |
+
{
|
| 106329 |
+
"epoch": 17.2997150997151,
|
| 106330 |
+
"grad_norm": 0.2839394807815552,
|
| 106331 |
+
"learning_rate": 2.4032254956959015e-06,
|
| 106332 |
+
"loss": 0.6151,
|
| 106333 |
+
"step": 15189
|
| 106334 |
+
},
|
| 106335 |
+
{
|
| 106336 |
+
"epoch": 17.3008547008547,
|
| 106337 |
+
"grad_norm": 0.16072551906108856,
|
| 106338 |
+
"learning_rate": 2.401232666422176e-06,
|
| 106339 |
+
"loss": 0.7992,
|
| 106340 |
+
"step": 15190
|
| 106341 |
+
},
|
| 106342 |
+
{
|
| 106343 |
+
"epoch": 17.301994301994302,
|
| 106344 |
+
"grad_norm": 0.22132433950901031,
|
| 106345 |
+
"learning_rate": 2.399240622065782e-06,
|
| 106346 |
+
"loss": 0.5885,
|
| 106347 |
+
"step": 15191
|
| 106348 |
+
},
|
| 106349 |
+
{
|
| 106350 |
+
"epoch": 17.303133903133904,
|
| 106351 |
+
"grad_norm": 0.24359983205795288,
|
| 106352 |
+
"learning_rate": 2.3972493626959106e-06,
|
| 106353 |
+
"loss": 0.6215,
|
| 106354 |
+
"step": 15192
|
| 106355 |
+
},
|
| 106356 |
+
{
|
| 106357 |
+
"epoch": 17.304273504273503,
|
| 106358 |
+
"grad_norm": 0.21864564716815948,
|
| 106359 |
+
"learning_rate": 2.395258888381735e-06,
|
| 106360 |
+
"loss": 0.7649,
|
| 106361 |
+
"step": 15193
|
| 106362 |
+
},
|
| 106363 |
+
{
|
| 106364 |
+
"epoch": 17.305413105413106,
|
| 106365 |
+
"grad_norm": 0.18766537308692932,
|
| 106366 |
+
"learning_rate": 2.393269199192377e-06,
|
| 106367 |
+
"loss": 0.6378,
|
| 106368 |
+
"step": 15194
|
| 106369 |
+
},
|
| 106370 |
+
{
|
| 106371 |
+
"epoch": 17.306552706552708,
|
| 106372 |
+
"grad_norm": 0.2603372633457184,
|
| 106373 |
+
"learning_rate": 2.3912802951969488e-06,
|
| 106374 |
+
"loss": 0.5515,
|
| 106375 |
+
"step": 15195
|
| 106376 |
+
},
|
| 106377 |
+
{
|
| 106378 |
+
"epoch": 17.307692307692307,
|
| 106379 |
+
"grad_norm": 0.2291465848684311,
|
| 106380 |
+
"learning_rate": 2.3892921764645304e-06,
|
| 106381 |
+
"loss": 0.6621,
|
| 106382 |
+
"step": 15196
|
| 106383 |
+
},
|
| 106384 |
+
{
|
| 106385 |
+
"epoch": 17.30883190883191,
|
| 106386 |
+
"grad_norm": 0.22870177030563354,
|
| 106387 |
+
"learning_rate": 2.3873048430641783e-06,
|
| 106388 |
+
"loss": 0.5992,
|
| 106389 |
+
"step": 15197
|
| 106390 |
+
},
|
| 106391 |
+
{
|
| 106392 |
+
"epoch": 17.30997150997151,
|
| 106393 |
+
"grad_norm": 0.18157687783241272,
|
| 106394 |
+
"learning_rate": 2.3853182950649118e-06,
|
| 106395 |
+
"loss": 0.688,
|
| 106396 |
+
"step": 15198
|
| 106397 |
+
},
|
| 106398 |
+
{
|
| 106399 |
+
"epoch": 17.31111111111111,
|
| 106400 |
+
"grad_norm": 0.27913182973861694,
|
| 106401 |
+
"learning_rate": 2.383332532535729e-06,
|
| 106402 |
+
"loss": 0.4714,
|
| 106403 |
+
"step": 15199
|
| 106404 |
+
},
|
| 106405 |
+
{
|
| 106406 |
+
"epoch": 17.312250712250712,
|
| 106407 |
+
"grad_norm": 0.17969077825546265,
|
| 106408 |
+
"learning_rate": 2.381347555545604e-06,
|
| 106409 |
+
"loss": 0.7142,
|
| 106410 |
+
"step": 15200
|
| 106411 |
+
},
|
| 106412 |
+
{
|
| 106413 |
+
"epoch": 17.313390313390315,
|
| 106414 |
+
"grad_norm": 0.1802387237548828,
|
| 106415 |
+
"learning_rate": 2.379363364163484e-06,
|
| 106416 |
+
"loss": 0.6974,
|
| 106417 |
+
"step": 15201
|
| 106418 |
+
},
|
| 106419 |
+
{
|
| 106420 |
+
"epoch": 17.314529914529913,
|
| 106421 |
+
"grad_norm": 0.17316658794879913,
|
| 106422 |
+
"learning_rate": 2.3773799584582756e-06,
|
| 106423 |
+
"loss": 0.9818,
|
| 106424 |
+
"step": 15202
|
| 106425 |
+
},
|
| 106426 |
+
{
|
| 106427 |
+
"epoch": 17.315669515669516,
|
| 106428 |
+
"grad_norm": 0.15264250338077545,
|
| 106429 |
+
"learning_rate": 2.375397338498872e-06,
|
| 106430 |
+
"loss": 0.7006,
|
| 106431 |
+
"step": 15203
|
| 106432 |
+
},
|
| 106433 |
+
{
|
| 106434 |
+
"epoch": 17.316809116809118,
|
| 106435 |
+
"grad_norm": 0.16245713829994202,
|
| 106436 |
+
"learning_rate": 2.3734155043541457e-06,
|
| 106437 |
+
"loss": 0.7195,
|
| 106438 |
+
"step": 15204
|
| 106439 |
+
},
|
| 106440 |
+
{
|
| 106441 |
+
"epoch": 17.317948717948717,
|
| 106442 |
+
"grad_norm": 0.18768909573554993,
|
| 106443 |
+
"learning_rate": 2.3714344560929165e-06,
|
| 106444 |
+
"loss": 0.7107,
|
| 106445 |
+
"step": 15205
|
| 106446 |
+
},
|
| 106447 |
+
{
|
| 106448 |
+
"epoch": 17.31908831908832,
|
| 106449 |
+
"grad_norm": 0.20597070455551147,
|
| 106450 |
+
"learning_rate": 2.369454193783996e-06,
|
| 106451 |
+
"loss": 0.67,
|
| 106452 |
+
"step": 15206
|
| 106453 |
+
},
|
| 106454 |
+
{
|
| 106455 |
+
"epoch": 17.32022792022792,
|
| 106456 |
+
"grad_norm": 0.20749664306640625,
|
| 106457 |
+
"learning_rate": 2.367474717496168e-06,
|
| 106458 |
+
"loss": 0.5346,
|
| 106459 |
+
"step": 15207
|
| 106460 |
+
},
|
| 106461 |
+
{
|
| 106462 |
+
"epoch": 17.32136752136752,
|
| 106463 |
+
"grad_norm": 0.20586107671260834,
|
| 106464 |
+
"learning_rate": 2.3654960272981862e-06,
|
| 106465 |
+
"loss": 0.6622,
|
| 106466 |
+
"step": 15208
|
| 106467 |
+
},
|
| 106468 |
+
{
|
| 106469 |
+
"epoch": 17.322507122507123,
|
| 106470 |
+
"grad_norm": 0.16947408020496368,
|
| 106471 |
+
"learning_rate": 2.363518123258768e-06,
|
| 106472 |
+
"loss": 0.6493,
|
| 106473 |
+
"step": 15209
|
| 106474 |
+
},
|
| 106475 |
+
{
|
| 106476 |
+
"epoch": 17.323646723646725,
|
| 106477 |
+
"grad_norm": 0.18709897994995117,
|
| 106478 |
+
"learning_rate": 2.3615410054466156e-06,
|
| 106479 |
+
"loss": 0.7421,
|
| 106480 |
+
"step": 15210
|
| 106481 |
+
},
|
| 106482 |
+
{
|
| 106483 |
+
"epoch": 17.324786324786324,
|
| 106484 |
+
"grad_norm": 0.18042375147342682,
|
| 106485 |
+
"learning_rate": 2.3595646739304008e-06,
|
| 106486 |
+
"loss": 0.9068,
|
| 106487 |
+
"step": 15211
|
| 106488 |
+
},
|
| 106489 |
+
{
|
| 106490 |
+
"epoch": 17.325925925925926,
|
| 106491 |
+
"grad_norm": 0.3071350157260895,
|
| 106492 |
+
"learning_rate": 2.3575891287787727e-06,
|
| 106493 |
+
"loss": 0.4679,
|
| 106494 |
+
"step": 15212
|
| 106495 |
+
},
|
| 106496 |
+
{
|
| 106497 |
+
"epoch": 17.32706552706553,
|
| 106498 |
+
"grad_norm": 0.1570240706205368,
|
| 106499 |
+
"learning_rate": 2.3556143700603327e-06,
|
| 106500 |
+
"loss": 0.8679,
|
| 106501 |
+
"step": 15213
|
| 106502 |
+
},
|
| 106503 |
+
{
|
| 106504 |
+
"epoch": 17.328205128205127,
|
| 106505 |
+
"grad_norm": 0.21524712443351746,
|
| 106506 |
+
"learning_rate": 2.3536403978436763e-06,
|
| 106507 |
+
"loss": 0.7099,
|
| 106508 |
+
"step": 15214
|
| 106509 |
+
},
|
| 106510 |
+
{
|
| 106511 |
+
"epoch": 17.32934472934473,
|
| 106512 |
+
"grad_norm": 0.2164105623960495,
|
| 106513 |
+
"learning_rate": 2.35166721219737e-06,
|
| 106514 |
+
"loss": 0.7441,
|
| 106515 |
+
"step": 15215
|
| 106516 |
+
},
|
| 106517 |
+
{
|
| 106518 |
+
"epoch": 17.33048433048433,
|
| 106519 |
+
"grad_norm": 0.19767627120018005,
|
| 106520 |
+
"learning_rate": 2.3496948131899483e-06,
|
| 106521 |
+
"loss": 0.8,
|
| 106522 |
+
"step": 15216
|
| 106523 |
+
},
|
| 106524 |
+
{
|
| 106525 |
+
"epoch": 17.33162393162393,
|
| 106526 |
+
"grad_norm": 0.1692846268415451,
|
| 106527 |
+
"learning_rate": 2.3477232008899117e-06,
|
| 106528 |
+
"loss": 0.6675,
|
| 106529 |
+
"step": 15217
|
| 106530 |
+
},
|
| 106531 |
+
{
|
| 106532 |
+
"epoch": 17.332763532763533,
|
| 106533 |
+
"grad_norm": 0.19805654883384705,
|
| 106534 |
+
"learning_rate": 2.3457523753657422e-06,
|
| 106535 |
+
"loss": 0.8357,
|
| 106536 |
+
"step": 15218
|
| 106537 |
+
},
|
| 106538 |
+
{
|
| 106539 |
+
"epoch": 17.333903133903135,
|
| 106540 |
+
"grad_norm": 0.19935043156147003,
|
| 106541 |
+
"learning_rate": 2.3437823366858902e-06,
|
| 106542 |
+
"loss": 0.5921,
|
| 106543 |
+
"step": 15219
|
| 106544 |
+
},
|
| 106545 |
+
{
|
| 106546 |
+
"epoch": 17.335042735042734,
|
| 106547 |
+
"grad_norm": 0.17238126695156097,
|
| 106548 |
+
"learning_rate": 2.341813084918787e-06,
|
| 106549 |
+
"loss": 0.464,
|
| 106550 |
+
"step": 15220
|
| 106551 |
+
},
|
| 106552 |
+
{
|
| 106553 |
+
"epoch": 17.336182336182336,
|
| 106554 |
+
"grad_norm": 0.21349164843559265,
|
| 106555 |
+
"learning_rate": 2.339844620132822e-06,
|
| 106556 |
+
"loss": 0.2943,
|
| 106557 |
+
"step": 15221
|
| 106558 |
+
},
|
| 106559 |
+
{
|
| 106560 |
+
"epoch": 17.33732193732194,
|
| 106561 |
+
"grad_norm": 0.16946952044963837,
|
| 106562 |
+
"learning_rate": 2.337876942396369e-06,
|
| 106563 |
+
"loss": 0.6608,
|
| 106564 |
+
"step": 15222
|
| 106565 |
+
},
|
| 106566 |
+
{
|
| 106567 |
+
"epoch": 17.338461538461537,
|
| 106568 |
+
"grad_norm": 0.18029257655143738,
|
| 106569 |
+
"learning_rate": 2.3359100517777733e-06,
|
| 106570 |
+
"loss": 0.9875,
|
| 106571 |
+
"step": 15223
|
| 106572 |
+
},
|
| 106573 |
+
{
|
| 106574 |
+
"epoch": 17.33960113960114,
|
| 106575 |
+
"grad_norm": 0.16952665150165558,
|
| 106576 |
+
"learning_rate": 2.333943948345349e-06,
|
| 106577 |
+
"loss": 0.8551,
|
| 106578 |
+
"step": 15224
|
| 106579 |
+
},
|
| 106580 |
+
{
|
| 106581 |
+
"epoch": 17.340740740740742,
|
| 106582 |
+
"grad_norm": 0.336750328540802,
|
| 106583 |
+
"learning_rate": 2.3319786321673753e-06,
|
| 106584 |
+
"loss": 0.4064,
|
| 106585 |
+
"step": 15225
|
| 106586 |
+
},
|
| 106587 |
+
{
|
| 106588 |
+
"epoch": 17.34188034188034,
|
| 106589 |
+
"grad_norm": 0.19412781298160553,
|
| 106590 |
+
"learning_rate": 2.3300141033121254e-06,
|
| 106591 |
+
"loss": 0.6186,
|
| 106592 |
+
"step": 15226
|
| 106593 |
+
},
|
| 106594 |
+
{
|
| 106595 |
+
"epoch": 17.343019943019943,
|
| 106596 |
+
"grad_norm": 0.19354306161403656,
|
| 106597 |
+
"learning_rate": 2.3280503618478334e-06,
|
| 106598 |
+
"loss": 0.7205,
|
| 106599 |
+
"step": 15227
|
| 106600 |
+
},
|
| 106601 |
+
{
|
| 106602 |
+
"epoch": 17.344159544159545,
|
| 106603 |
+
"grad_norm": 0.21017666161060333,
|
| 106604 |
+
"learning_rate": 2.3260874078426947e-06,
|
| 106605 |
+
"loss": 0.6777,
|
| 106606 |
+
"step": 15228
|
| 106607 |
+
},
|
| 106608 |
+
{
|
| 106609 |
+
"epoch": 17.345299145299144,
|
| 106610 |
+
"grad_norm": 0.17014797031879425,
|
| 106611 |
+
"learning_rate": 2.3241252413648966e-06,
|
| 106612 |
+
"loss": 0.5468,
|
| 106613 |
+
"step": 15229
|
| 106614 |
+
},
|
| 106615 |
+
{
|
| 106616 |
+
"epoch": 17.346438746438746,
|
| 106617 |
+
"grad_norm": 0.2159450501203537,
|
| 106618 |
+
"learning_rate": 2.3221638624825847e-06,
|
| 106619 |
+
"loss": 0.4806,
|
| 106620 |
+
"step": 15230
|
| 106621 |
+
},
|
| 106622 |
+
{
|
| 106623 |
+
"epoch": 17.34757834757835,
|
| 106624 |
+
"grad_norm": 0.17926137149333954,
|
| 106625 |
+
"learning_rate": 2.320203271263893e-06,
|
| 106626 |
+
"loss": 0.7847,
|
| 106627 |
+
"step": 15231
|
| 106628 |
+
},
|
| 106629 |
+
{
|
| 106630 |
+
"epoch": 17.348717948717947,
|
| 106631 |
+
"grad_norm": 0.1916458010673523,
|
| 106632 |
+
"learning_rate": 2.3182434677769066e-06,
|
| 106633 |
+
"loss": 0.6717,
|
| 106634 |
+
"step": 15232
|
| 106635 |
+
},
|
| 106636 |
+
{
|
| 106637 |
+
"epoch": 17.34985754985755,
|
| 106638 |
+
"grad_norm": 0.19014115631580353,
|
| 106639 |
+
"learning_rate": 2.3162844520896983e-06,
|
| 106640 |
+
"loss": 0.9252,
|
| 106641 |
+
"step": 15233
|
| 106642 |
+
},
|
| 106643 |
+
{
|
| 106644 |
+
"epoch": 17.350997150997152,
|
| 106645 |
+
"grad_norm": 0.17423704266548157,
|
| 106646 |
+
"learning_rate": 2.3143262242703105e-06,
|
| 106647 |
+
"loss": 0.6787,
|
| 106648 |
+
"step": 15234
|
| 106649 |
+
},
|
| 106650 |
+
{
|
| 106651 |
+
"epoch": 17.35213675213675,
|
| 106652 |
+
"grad_norm": 0.19930385053157806,
|
| 106653 |
+
"learning_rate": 2.312368784386765e-06,
|
| 106654 |
+
"loss": 0.8409,
|
| 106655 |
+
"step": 15235
|
| 106656 |
+
},
|
| 106657 |
+
{
|
| 106658 |
+
"epoch": 17.353276353276353,
|
| 106659 |
+
"grad_norm": 0.16899840533733368,
|
| 106660 |
+
"learning_rate": 2.310412132507034e-06,
|
| 106661 |
+
"loss": 0.6438,
|
| 106662 |
+
"step": 15236
|
| 106663 |
+
},
|
| 106664 |
+
{
|
| 106665 |
+
"epoch": 17.354415954415956,
|
| 106666 |
+
"grad_norm": 0.17552229762077332,
|
| 106667 |
+
"learning_rate": 2.308456268699091e-06,
|
| 106668 |
+
"loss": 0.5095,
|
| 106669 |
+
"step": 15237
|
| 106670 |
+
},
|
| 106671 |
+
{
|
| 106672 |
+
"epoch": 17.355555555555554,
|
| 106673 |
+
"grad_norm": 0.23030370473861694,
|
| 106674 |
+
"learning_rate": 2.306501193030866e-06,
|
| 106675 |
+
"loss": 0.9334,
|
| 106676 |
+
"step": 15238
|
| 106677 |
+
},
|
| 106678 |
+
{
|
| 106679 |
+
"epoch": 17.356695156695157,
|
| 106680 |
+
"grad_norm": 0.21691684424877167,
|
| 106681 |
+
"learning_rate": 2.3045469055702593e-06,
|
| 106682 |
+
"loss": 0.5538,
|
| 106683 |
+
"step": 15239
|
| 106684 |
+
},
|
| 106685 |
+
{
|
| 106686 |
+
"epoch": 17.35783475783476,
|
| 106687 |
+
"grad_norm": 0.20816679298877716,
|
| 106688 |
+
"learning_rate": 2.3025934063851533e-06,
|
| 106689 |
+
"loss": 0.558,
|
| 106690 |
+
"step": 15240
|
| 106691 |
+
},
|
| 106692 |
+
{
|
| 106693 |
+
"epoch": 17.358974358974358,
|
| 106694 |
+
"grad_norm": 0.20304793119430542,
|
| 106695 |
+
"learning_rate": 2.3006406955433936e-06,
|
| 106696 |
+
"loss": 0.7394,
|
| 106697 |
+
"step": 15241
|
| 106698 |
+
},
|
| 106699 |
+
{
|
| 106700 |
+
"epoch": 17.36011396011396,
|
| 106701 |
+
"grad_norm": 0.1755012720823288,
|
| 106702 |
+
"learning_rate": 2.2986887731128116e-06,
|
| 106703 |
+
"loss": 0.6438,
|
| 106704 |
+
"step": 15242
|
| 106705 |
+
},
|
| 106706 |
+
{
|
| 106707 |
+
"epoch": 17.361253561253562,
|
| 106708 |
+
"grad_norm": 0.189613476395607,
|
| 106709 |
+
"learning_rate": 2.2967376391611923e-06,
|
| 106710 |
+
"loss": 0.5026,
|
| 106711 |
+
"step": 15243
|
| 106712 |
+
},
|
| 106713 |
+
{
|
| 106714 |
+
"epoch": 17.36239316239316,
|
| 106715 |
+
"grad_norm": 0.24741695821285248,
|
| 106716 |
+
"learning_rate": 2.2947872937563094e-06,
|
| 106717 |
+
"loss": 0.3569,
|
| 106718 |
+
"step": 15244
|
| 106719 |
+
},
|
| 106720 |
+
{
|
| 106721 |
+
"epoch": 17.363532763532763,
|
| 106722 |
+
"grad_norm": 0.1851482093334198,
|
| 106723 |
+
"learning_rate": 2.2928377369659026e-06,
|
| 106724 |
+
"loss": 0.7729,
|
| 106725 |
+
"step": 15245
|
| 106726 |
+
},
|
| 106727 |
+
{
|
| 106728 |
+
"epoch": 17.364672364672366,
|
| 106729 |
+
"grad_norm": 1.4638245105743408,
|
| 106730 |
+
"learning_rate": 2.2908889688576905e-06,
|
| 106731 |
+
"loss": 0.3429,
|
| 106732 |
+
"step": 15246
|
| 106733 |
+
},
|
| 106734 |
+
{
|
| 106735 |
+
"epoch": 17.365811965811965,
|
| 106736 |
+
"grad_norm": 0.16212010383605957,
|
| 106737 |
+
"learning_rate": 2.288940989499347e-06,
|
| 106738 |
+
"loss": 0.6244,
|
| 106739 |
+
"step": 15247
|
| 106740 |
+
},
|
| 106741 |
+
{
|
| 106742 |
+
"epoch": 17.366951566951567,
|
| 106743 |
+
"grad_norm": 0.21030201017856598,
|
| 106744 |
+
"learning_rate": 2.2869937989585423e-06,
|
| 106745 |
+
"loss": 0.5332,
|
| 106746 |
+
"step": 15248
|
| 106747 |
+
},
|
| 106748 |
+
{
|
| 106749 |
+
"epoch": 17.36809116809117,
|
| 106750 |
+
"grad_norm": 0.20888923108577728,
|
| 106751 |
+
"learning_rate": 2.2850473973029083e-06,
|
| 106752 |
+
"loss": 0.8135,
|
| 106753 |
+
"step": 15249
|
| 106754 |
+
},
|
| 106755 |
+
{
|
| 106756 |
+
"epoch": 17.369230769230768,
|
| 106757 |
+
"grad_norm": 0.22561891376972198,
|
| 106758 |
+
"learning_rate": 2.283101784600039e-06,
|
| 106759 |
+
"loss": 0.8256,
|
| 106760 |
+
"step": 15250
|
| 106761 |
+
},
|
| 106762 |
+
{
|
| 106763 |
+
"epoch": 17.37037037037037,
|
| 106764 |
+
"grad_norm": 0.20610755681991577,
|
| 106765 |
+
"learning_rate": 2.281156960917519e-06,
|
| 106766 |
+
"loss": 0.5839,
|
| 106767 |
+
"step": 15251
|
| 106768 |
+
},
|
| 106769 |
+
{
|
| 106770 |
+
"epoch": 17.371509971509973,
|
| 106771 |
+
"grad_norm": 0.1993379443883896,
|
| 106772 |
+
"learning_rate": 2.2792129263228934e-06,
|
| 106773 |
+
"loss": 0.5128,
|
| 106774 |
+
"step": 15252
|
| 106775 |
+
},
|
| 106776 |
+
{
|
| 106777 |
+
"epoch": 17.37264957264957,
|
| 106778 |
+
"grad_norm": 0.1732664853334427,
|
| 106779 |
+
"learning_rate": 2.2772696808836895e-06,
|
| 106780 |
+
"loss": 0.7115,
|
| 106781 |
+
"step": 15253
|
| 106782 |
+
},
|
| 106783 |
+
{
|
| 106784 |
+
"epoch": 17.373789173789174,
|
| 106785 |
+
"grad_norm": 0.23456768691539764,
|
| 106786 |
+
"learning_rate": 2.275327224667392e-06,
|
| 106787 |
+
"loss": 0.5868,
|
| 106788 |
+
"step": 15254
|
| 106789 |
+
},
|
| 106790 |
+
{
|
| 106791 |
+
"epoch": 17.374928774928776,
|
| 106792 |
+
"grad_norm": 0.18440523743629456,
|
| 106793 |
+
"learning_rate": 2.2733855577414754e-06,
|
| 106794 |
+
"loss": 0.7298,
|
| 106795 |
+
"step": 15255
|
| 106796 |
+
},
|
| 106797 |
+
{
|
| 106798 |
+
"epoch": 17.376068376068375,
|
| 106799 |
+
"grad_norm": 0.20289857685565948,
|
| 106800 |
+
"learning_rate": 2.2714446801733765e-06,
|
| 106801 |
+
"loss": 0.3849,
|
| 106802 |
+
"step": 15256
|
| 106803 |
+
},
|
| 106804 |
+
{
|
| 106805 |
+
"epoch": 17.377207977207977,
|
| 106806 |
+
"grad_norm": 0.17441540956497192,
|
| 106807 |
+
"learning_rate": 2.2695045920305052e-06,
|
| 106808 |
+
"loss": 0.6876,
|
| 106809 |
+
"step": 15257
|
| 106810 |
+
},
|
| 106811 |
+
{
|
| 106812 |
+
"epoch": 17.37834757834758,
|
| 106813 |
+
"grad_norm": 0.17561806738376617,
|
| 106814 |
+
"learning_rate": 2.267565293380253e-06,
|
| 106815 |
+
"loss": 0.4977,
|
| 106816 |
+
"step": 15258
|
| 106817 |
+
},
|
| 106818 |
+
{
|
| 106819 |
+
"epoch": 17.379487179487178,
|
| 106820 |
+
"grad_norm": 0.18843214213848114,
|
| 106821 |
+
"learning_rate": 2.2656267842899675e-06,
|
| 106822 |
+
"loss": 0.3904,
|
| 106823 |
+
"step": 15259
|
| 106824 |
+
},
|
| 106825 |
+
{
|
| 106826 |
+
"epoch": 17.38062678062678,
|
| 106827 |
+
"grad_norm": 0.20129486918449402,
|
| 106828 |
+
"learning_rate": 2.2636890648269877e-06,
|
| 106829 |
+
"loss": 0.6357,
|
| 106830 |
+
"step": 15260
|
| 106831 |
+
},
|
| 106832 |
+
{
|
| 106833 |
+
"epoch": 17.381766381766383,
|
| 106834 |
+
"grad_norm": 0.18350058794021606,
|
| 106835 |
+
"learning_rate": 2.2617521350586145e-06,
|
| 106836 |
+
"loss": 0.6551,
|
| 106837 |
+
"step": 15261
|
| 106838 |
+
},
|
| 106839 |
+
{
|
| 106840 |
+
"epoch": 17.38290598290598,
|
| 106841 |
+
"grad_norm": 0.20070157945156097,
|
| 106842 |
+
"learning_rate": 2.259815995052114e-06,
|
| 106843 |
+
"loss": 0.761,
|
| 106844 |
+
"step": 15262
|
| 106845 |
+
},
|
| 106846 |
+
{
|
| 106847 |
+
"epoch": 17.384045584045584,
|
| 106848 |
+
"grad_norm": 0.21926534175872803,
|
| 106849 |
+
"learning_rate": 2.2578806448747408e-06,
|
| 106850 |
+
"loss": 0.6202,
|
| 106851 |
+
"step": 15263
|
| 106852 |
+
},
|
| 106853 |
+
{
|
| 106854 |
+
"epoch": 17.385185185185186,
|
| 106855 |
+
"grad_norm": 0.18719224631786346,
|
| 106856 |
+
"learning_rate": 2.2559460845937156e-06,
|
| 106857 |
+
"loss": 0.794,
|
| 106858 |
+
"step": 15264
|
| 106859 |
+
},
|
| 106860 |
+
{
|
| 106861 |
+
"epoch": 17.386324786324785,
|
| 106862 |
+
"grad_norm": 0.1868806928396225,
|
| 106863 |
+
"learning_rate": 2.2540123142762326e-06,
|
| 106864 |
+
"loss": 0.5571,
|
| 106865 |
+
"step": 15265
|
| 106866 |
+
},
|
| 106867 |
+
{
|
| 106868 |
+
"epoch": 17.387464387464387,
|
| 106869 |
+
"grad_norm": 0.21154086291790009,
|
| 106870 |
+
"learning_rate": 2.2520793339894487e-06,
|
| 106871 |
+
"loss": 0.6729,
|
| 106872 |
+
"step": 15266
|
| 106873 |
+
},
|
| 106874 |
+
{
|
| 106875 |
+
"epoch": 17.38860398860399,
|
| 106876 |
+
"grad_norm": 0.1811610460281372,
|
| 106877 |
+
"learning_rate": 2.2501471438005074e-06,
|
| 106878 |
+
"loss": 0.7013,
|
| 106879 |
+
"step": 15267
|
| 106880 |
+
},
|
| 106881 |
+
{
|
| 106882 |
+
"epoch": 17.38974358974359,
|
| 106883 |
+
"grad_norm": 0.2186114490032196,
|
| 106884 |
+
"learning_rate": 2.248215743776516e-06,
|
| 106885 |
+
"loss": 0.6498,
|
| 106886 |
+
"step": 15268
|
| 106887 |
+
},
|
| 106888 |
+
{
|
| 106889 |
+
"epoch": 17.39088319088319,
|
| 106890 |
+
"grad_norm": 0.18534058332443237,
|
| 106891 |
+
"learning_rate": 2.246285133984563e-06,
|
| 106892 |
+
"loss": 0.5917,
|
| 106893 |
+
"step": 15269
|
| 106894 |
+
},
|
| 106895 |
+
{
|
| 106896 |
+
"epoch": 17.392022792022793,
|
| 106897 |
+
"grad_norm": 0.22112229466438293,
|
| 106898 |
+
"learning_rate": 2.2443553144916975e-06,
|
| 106899 |
+
"loss": 0.5508,
|
| 106900 |
+
"step": 15270
|
| 106901 |
+
},
|
| 106902 |
+
{
|
| 106903 |
+
"epoch": 17.39316239316239,
|
| 106904 |
+
"grad_norm": 0.18932108581066132,
|
| 106905 |
+
"learning_rate": 2.242426285364951e-06,
|
| 106906 |
+
"loss": 0.7679,
|
| 106907 |
+
"step": 15271
|
| 106908 |
+
},
|
| 106909 |
+
{
|
| 106910 |
+
"epoch": 17.394301994301994,
|
| 106911 |
+
"grad_norm": 0.20873083174228668,
|
| 106912 |
+
"learning_rate": 2.2404980466713273e-06,
|
| 106913 |
+
"loss": 0.6958,
|
| 106914 |
+
"step": 15272
|
| 106915 |
+
},
|
| 106916 |
+
{
|
| 106917 |
+
"epoch": 17.395441595441596,
|
| 106918 |
+
"grad_norm": 0.1730998009443283,
|
| 106919 |
+
"learning_rate": 2.2385705984777934e-06,
|
| 106920 |
+
"loss": 0.787,
|
| 106921 |
+
"step": 15273
|
| 106922 |
+
},
|
| 106923 |
+
{
|
| 106924 |
+
"epoch": 17.396581196581195,
|
| 106925 |
+
"grad_norm": 0.22182199358940125,
|
| 106926 |
+
"learning_rate": 2.2366439408512936e-06,
|
| 106927 |
+
"loss": 0.583,
|
| 106928 |
+
"step": 15274
|
| 106929 |
+
},
|
| 106930 |
+
{
|
| 106931 |
+
"epoch": 17.397720797720797,
|
| 106932 |
+
"grad_norm": 0.20717033743858337,
|
| 106933 |
+
"learning_rate": 2.2347180738587493e-06,
|
| 106934 |
+
"loss": 0.5336,
|
| 106935 |
+
"step": 15275
|
| 106936 |
+
},
|
| 106937 |
+
{
|
| 106938 |
+
"epoch": 17.3988603988604,
|
| 106939 |
+
"grad_norm": 0.1839270144701004,
|
| 106940 |
+
"learning_rate": 2.232792997567057e-06,
|
| 106941 |
+
"loss": 0.6468,
|
| 106942 |
+
"step": 15276
|
| 106943 |
+
},
|
| 106944 |
+
{
|
| 106945 |
+
"epoch": 17.4,
|
| 106946 |
+
"grad_norm": 0.19520770013332367,
|
| 106947 |
+
"learning_rate": 2.2308687120430688e-06,
|
| 106948 |
+
"loss": 0.7095,
|
| 106949 |
+
"step": 15277
|
| 106950 |
+
},
|
| 106951 |
+
{
|
| 106952 |
+
"epoch": 17.4011396011396,
|
| 106953 |
+
"grad_norm": 0.17511405050754547,
|
| 106954 |
+
"learning_rate": 2.2289452173536256e-06,
|
| 106955 |
+
"loss": 0.8228,
|
| 106956 |
+
"step": 15278
|
| 106957 |
+
},
|
| 106958 |
+
{
|
| 106959 |
+
"epoch": 17.402279202279203,
|
| 106960 |
+
"grad_norm": 0.2063295543193817,
|
| 106961 |
+
"learning_rate": 2.2270225135655354e-06,
|
| 106962 |
+
"loss": 0.6654,
|
| 106963 |
+
"step": 15279
|
| 106964 |
+
},
|
| 106965 |
+
{
|
| 106966 |
+
"epoch": 17.403418803418802,
|
| 106967 |
+
"grad_norm": 0.21008919179439545,
|
| 106968 |
+
"learning_rate": 2.2251006007455806e-06,
|
| 106969 |
+
"loss": 0.608,
|
| 106970 |
+
"step": 15280
|
| 106971 |
+
},
|
| 106972 |
+
{
|
| 106973 |
+
"epoch": 17.404558404558404,
|
| 106974 |
+
"grad_norm": 0.21195709705352783,
|
| 106975 |
+
"learning_rate": 2.223179478960513e-06,
|
| 106976 |
+
"loss": 0.736,
|
| 106977 |
+
"step": 15281
|
| 106978 |
+
},
|
| 106979 |
+
{
|
| 106980 |
+
"epoch": 17.405698005698007,
|
| 106981 |
+
"grad_norm": 0.19682569801807404,
|
| 106982 |
+
"learning_rate": 2.221259148277058e-06,
|
| 106983 |
+
"loss": 0.4586,
|
| 106984 |
+
"step": 15282
|
| 106985 |
+
},
|
| 106986 |
+
{
|
| 106987 |
+
"epoch": 17.406837606837605,
|
| 106988 |
+
"grad_norm": 0.1939670443534851,
|
| 106989 |
+
"learning_rate": 2.219339608761917e-06,
|
| 106990 |
+
"loss": 0.703,
|
| 106991 |
+
"step": 15283
|
| 106992 |
+
},
|
| 106993 |
+
{
|
| 106994 |
+
"epoch": 17.407977207977208,
|
| 106995 |
+
"grad_norm": 0.18122750520706177,
|
| 106996 |
+
"learning_rate": 2.217420860481756e-06,
|
| 106997 |
+
"loss": 0.7662,
|
| 106998 |
+
"step": 15284
|
| 106999 |
+
},
|
| 107000 |
+
{
|
| 107001 |
+
"epoch": 17.40911680911681,
|
| 107002 |
+
"grad_norm": 0.18972159922122955,
|
| 107003 |
+
"learning_rate": 2.215502903503222e-06,
|
| 107004 |
+
"loss": 0.6504,
|
| 107005 |
+
"step": 15285
|
| 107006 |
+
},
|
| 107007 |
+
{
|
| 107008 |
+
"epoch": 17.41025641025641,
|
| 107009 |
+
"grad_norm": 0.23849743604660034,
|
| 107010 |
+
"learning_rate": 2.2135857378929287e-06,
|
| 107011 |
+
"loss": 0.5109,
|
| 107012 |
+
"step": 15286
|
| 107013 |
+
},
|
| 107014 |
+
{
|
| 107015 |
+
"epoch": 17.41139601139601,
|
| 107016 |
+
"grad_norm": 0.2310958057641983,
|
| 107017 |
+
"learning_rate": 2.2116693637174694e-06,
|
| 107018 |
+
"loss": 0.8016,
|
| 107019 |
+
"step": 15287
|
| 107020 |
+
},
|
| 107021 |
+
{
|
| 107022 |
+
"epoch": 17.412535612535613,
|
| 107023 |
+
"grad_norm": 0.18404515087604523,
|
| 107024 |
+
"learning_rate": 2.2097537810433973e-06,
|
| 107025 |
+
"loss": 0.9114,
|
| 107026 |
+
"step": 15288
|
| 107027 |
+
},
|
| 107028 |
+
{
|
| 107029 |
+
"epoch": 17.413675213675212,
|
| 107030 |
+
"grad_norm": 0.2456667721271515,
|
| 107031 |
+
"learning_rate": 2.2078389899372497e-06,
|
| 107032 |
+
"loss": 0.2226,
|
| 107033 |
+
"step": 15289
|
| 107034 |
+
},
|
| 107035 |
+
{
|
| 107036 |
+
"epoch": 17.414814814814815,
|
| 107037 |
+
"grad_norm": 0.17694532871246338,
|
| 107038 |
+
"learning_rate": 2.205924990465533e-06,
|
| 107039 |
+
"loss": 0.7566,
|
| 107040 |
+
"step": 15290
|
| 107041 |
+
},
|
| 107042 |
+
{
|
| 107043 |
+
"epoch": 17.415954415954417,
|
| 107044 |
+
"grad_norm": 0.17129088938236237,
|
| 107045 |
+
"learning_rate": 2.204011782694723e-06,
|
| 107046 |
+
"loss": 0.8028,
|
| 107047 |
+
"step": 15291
|
| 107048 |
+
},
|
| 107049 |
+
{
|
| 107050 |
+
"epoch": 17.417094017094016,
|
| 107051 |
+
"grad_norm": 0.22568799555301666,
|
| 107052 |
+
"learning_rate": 2.2020993666912764e-06,
|
| 107053 |
+
"loss": 0.5809,
|
| 107054 |
+
"step": 15292
|
| 107055 |
+
},
|
| 107056 |
+
{
|
| 107057 |
+
"epoch": 17.418233618233618,
|
| 107058 |
+
"grad_norm": 0.17860203981399536,
|
| 107059 |
+
"learning_rate": 2.2001877425216117e-06,
|
| 107060 |
+
"loss": 0.636,
|
| 107061 |
+
"step": 15293
|
| 107062 |
+
},
|
| 107063 |
+
{
|
| 107064 |
+
"epoch": 17.41937321937322,
|
| 107065 |
+
"grad_norm": 0.23044751584529877,
|
| 107066 |
+
"learning_rate": 2.1982769102521254e-06,
|
| 107067 |
+
"loss": 0.5667,
|
| 107068 |
+
"step": 15294
|
| 107069 |
+
},
|
| 107070 |
+
{
|
| 107071 |
+
"epoch": 17.42051282051282,
|
| 107072 |
+
"grad_norm": 0.20839495956897736,
|
| 107073 |
+
"learning_rate": 2.196366869949193e-06,
|
| 107074 |
+
"loss": 0.6226,
|
| 107075 |
+
"step": 15295
|
| 107076 |
+
},
|
| 107077 |
+
{
|
| 107078 |
+
"epoch": 17.42165242165242,
|
| 107079 |
+
"grad_norm": 0.18788960576057434,
|
| 107080 |
+
"learning_rate": 2.194457621679144e-06,
|
| 107081 |
+
"loss": 0.7101,
|
| 107082 |
+
"step": 15296
|
| 107083 |
+
},
|
| 107084 |
+
{
|
| 107085 |
+
"epoch": 17.422792022792024,
|
| 107086 |
+
"grad_norm": 0.167411208152771,
|
| 107087 |
+
"learning_rate": 2.1925491655082982e-06,
|
| 107088 |
+
"loss": 0.6827,
|
| 107089 |
+
"step": 15297
|
| 107090 |
+
},
|
| 107091 |
+
{
|
| 107092 |
+
"epoch": 17.423931623931622,
|
| 107093 |
+
"grad_norm": 0.19627101719379425,
|
| 107094 |
+
"learning_rate": 2.190641501502941e-06,
|
| 107095 |
+
"loss": 0.5104,
|
| 107096 |
+
"step": 15298
|
| 107097 |
+
},
|
| 107098 |
+
{
|
| 107099 |
+
"epoch": 17.425071225071225,
|
| 107100 |
+
"grad_norm": 0.18951691687107086,
|
| 107101 |
+
"learning_rate": 2.188734629729333e-06,
|
| 107102 |
+
"loss": 0.5709,
|
| 107103 |
+
"step": 15299
|
| 107104 |
+
},
|
| 107105 |
+
{
|
| 107106 |
+
"epoch": 17.426210826210827,
|
| 107107 |
+
"grad_norm": 0.2600310742855072,
|
| 107108 |
+
"learning_rate": 2.1868285502537e-06,
|
| 107109 |
+
"loss": 0.4652,
|
| 107110 |
+
"step": 15300
|
| 107111 |
}
|
| 107112 |
],
|
| 107113 |
"logging_steps": 1,
|
|
|
|
| 107127 |
"attributes": {}
|
| 107128 |
}
|
| 107129 |
},
|
| 107130 |
+
"total_flos": 8.554521652823753e+19,
|
| 107131 |
"train_batch_size": 8,
|
| 107132 |
"trial_name": null,
|
| 107133 |
"trial_params": null
|