Training in progress, step 16200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e34874bc5d610e79a943aa178d84773a68b730b29bd1b8283a91a8c53b1970ff
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b656ff4c57cb328eab480566bbf3e4113649555f2693d5ea3c499adf2b39a470
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64f901e3dd85487be972809864a9546772f7d63c315505a0aa325ee6514a89c3
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 19.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -111308,6 +111308,2106 @@
|
|
| 111308 |
"learning_rate": 8.097412156181927e-08,
|
| 111309 |
"loss": 0.7563017010688782,
|
| 111310 |
"step": 15900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111311 |
}
|
| 111312 |
],
|
| 111313 |
"logging_steps": 1,
|
|
@@ -111327,7 +113427,7 @@
|
|
| 111327 |
"attributes": {}
|
| 111328 |
}
|
| 111329 |
},
|
| 111330 |
-
"total_flos": 4.
|
| 111331 |
"train_batch_size": 8,
|
| 111332 |
"trial_name": null,
|
| 111333 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 19.87730061349693,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 16200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 111308 |
"learning_rate": 8.097412156181927e-08,
|
| 111309 |
"loss": 0.7563017010688782,
|
| 111310 |
"step": 15900
|
| 111311 |
+
},
|
| 111312 |
+
{
|
| 111313 |
+
"epoch": 19.51042944785276,
|
| 111314 |
+
"grad_norm": 0.24737033247947693,
|
| 111315 |
+
"learning_rate": 8.057098097314086e-08,
|
| 111316 |
+
"loss": 0.5320447087287903,
|
| 111317 |
+
"step": 15901
|
| 111318 |
+
},
|
| 111319 |
+
{
|
| 111320 |
+
"epoch": 19.51165644171779,
|
| 111321 |
+
"grad_norm": 0.2320805937051773,
|
| 111322 |
+
"learning_rate": 8.016884481557574e-08,
|
| 111323 |
+
"loss": 0.40771248936653137,
|
| 111324 |
+
"step": 15902
|
| 111325 |
+
},
|
| 111326 |
+
{
|
| 111327 |
+
"epoch": 19.512883435582822,
|
| 111328 |
+
"grad_norm": 0.26835694909095764,
|
| 111329 |
+
"learning_rate": 7.976771310533038e-08,
|
| 111330 |
+
"loss": 0.6366398930549622,
|
| 111331 |
+
"step": 15903
|
| 111332 |
+
},
|
| 111333 |
+
{
|
| 111334 |
+
"epoch": 19.514110429447854,
|
| 111335 |
+
"grad_norm": 0.25947368144989014,
|
| 111336 |
+
"learning_rate": 7.93675858585724e-08,
|
| 111337 |
+
"loss": 0.6098448038101196,
|
| 111338 |
+
"step": 15904
|
| 111339 |
+
},
|
| 111340 |
+
{
|
| 111341 |
+
"epoch": 19.515337423312882,
|
| 111342 |
+
"grad_norm": 0.2561061382293701,
|
| 111343 |
+
"learning_rate": 7.896846309143058e-08,
|
| 111344 |
+
"loss": 0.4435282349586487,
|
| 111345 |
+
"step": 15905
|
| 111346 |
+
},
|
| 111347 |
+
{
|
| 111348 |
+
"epoch": 19.516564417177914,
|
| 111349 |
+
"grad_norm": 0.25841858983039856,
|
| 111350 |
+
"learning_rate": 7.857034481999481e-08,
|
| 111351 |
+
"loss": 0.4411451816558838,
|
| 111352 |
+
"step": 15906
|
| 111353 |
+
},
|
| 111354 |
+
{
|
| 111355 |
+
"epoch": 19.517791411042946,
|
| 111356 |
+
"grad_norm": 0.310369610786438,
|
| 111357 |
+
"learning_rate": 7.81732310603106e-08,
|
| 111358 |
+
"loss": 0.5852314233779907,
|
| 111359 |
+
"step": 15907
|
| 111360 |
+
},
|
| 111361 |
+
{
|
| 111362 |
+
"epoch": 19.519018404907975,
|
| 111363 |
+
"grad_norm": 0.24505546689033508,
|
| 111364 |
+
"learning_rate": 7.777712182838459e-08,
|
| 111365 |
+
"loss": 0.39524149894714355,
|
| 111366 |
+
"step": 15908
|
| 111367 |
+
},
|
| 111368 |
+
{
|
| 111369 |
+
"epoch": 19.520245398773007,
|
| 111370 |
+
"grad_norm": 0.28480860590934753,
|
| 111371 |
+
"learning_rate": 7.738201714017901e-08,
|
| 111372 |
+
"loss": 0.6589886546134949,
|
| 111373 |
+
"step": 15909
|
| 111374 |
+
},
|
| 111375 |
+
{
|
| 111376 |
+
"epoch": 19.521472392638035,
|
| 111377 |
+
"grad_norm": 0.278029203414917,
|
| 111378 |
+
"learning_rate": 7.698791701162556e-08,
|
| 111379 |
+
"loss": 0.7642176151275635,
|
| 111380 |
+
"step": 15910
|
| 111381 |
+
},
|
| 111382 |
+
{
|
| 111383 |
+
"epoch": 19.522699386503067,
|
| 111384 |
+
"grad_norm": 0.23402190208435059,
|
| 111385 |
+
"learning_rate": 7.659482145860597e-08,
|
| 111386 |
+
"loss": 0.5158405900001526,
|
| 111387 |
+
"step": 15911
|
| 111388 |
+
},
|
| 111389 |
+
{
|
| 111390 |
+
"epoch": 19.5239263803681,
|
| 111391 |
+
"grad_norm": 0.24634166061878204,
|
| 111392 |
+
"learning_rate": 7.620273049696313e-08,
|
| 111393 |
+
"loss": 0.3461175858974457,
|
| 111394 |
+
"step": 15912
|
| 111395 |
+
},
|
| 111396 |
+
{
|
| 111397 |
+
"epoch": 19.525153374233128,
|
| 111398 |
+
"grad_norm": 0.24783329665660858,
|
| 111399 |
+
"learning_rate": 7.581164414250663e-08,
|
| 111400 |
+
"loss": 0.6311928629875183,
|
| 111401 |
+
"step": 15913
|
| 111402 |
+
},
|
| 111403 |
+
{
|
| 111404 |
+
"epoch": 19.52638036809816,
|
| 111405 |
+
"grad_norm": 0.30311301350593567,
|
| 111406 |
+
"learning_rate": 7.542156241099607e-08,
|
| 111407 |
+
"loss": 0.6981312036514282,
|
| 111408 |
+
"step": 15914
|
| 111409 |
+
},
|
| 111410 |
+
{
|
| 111411 |
+
"epoch": 19.52760736196319,
|
| 111412 |
+
"grad_norm": 0.2723868489265442,
|
| 111413 |
+
"learning_rate": 7.503248531815499e-08,
|
| 111414 |
+
"loss": 0.3305470049381256,
|
| 111415 |
+
"step": 15915
|
| 111416 |
+
},
|
| 111417 |
+
{
|
| 111418 |
+
"epoch": 19.52883435582822,
|
| 111419 |
+
"grad_norm": 0.2765800654888153,
|
| 111420 |
+
"learning_rate": 7.464441287966528e-08,
|
| 111421 |
+
"loss": 0.382474422454834,
|
| 111422 |
+
"step": 15916
|
| 111423 |
+
},
|
| 111424 |
+
{
|
| 111425 |
+
"epoch": 19.530061349693252,
|
| 111426 |
+
"grad_norm": 0.2898448407649994,
|
| 111427 |
+
"learning_rate": 7.425734511117e-08,
|
| 111428 |
+
"loss": 0.5361390113830566,
|
| 111429 |
+
"step": 15917
|
| 111430 |
+
},
|
| 111431 |
+
{
|
| 111432 |
+
"epoch": 19.53128834355828,
|
| 111433 |
+
"grad_norm": 0.31788763403892517,
|
| 111434 |
+
"learning_rate": 7.387128202827054e-08,
|
| 111435 |
+
"loss": 0.8060480356216431,
|
| 111436 |
+
"step": 15918
|
| 111437 |
+
},
|
| 111438 |
+
{
|
| 111439 |
+
"epoch": 19.532515337423312,
|
| 111440 |
+
"grad_norm": 0.27382099628448486,
|
| 111441 |
+
"learning_rate": 7.348622364652946e-08,
|
| 111442 |
+
"loss": 0.7617279291152954,
|
| 111443 |
+
"step": 15919
|
| 111444 |
+
},
|
| 111445 |
+
{
|
| 111446 |
+
"epoch": 19.533742331288344,
|
| 111447 |
+
"grad_norm": 0.2756589353084564,
|
| 111448 |
+
"learning_rate": 7.31021699814649e-08,
|
| 111449 |
+
"loss": 0.6167570948600769,
|
| 111450 |
+
"step": 15920
|
| 111451 |
+
},
|
| 111452 |
+
{
|
| 111453 |
+
"epoch": 19.534969325153373,
|
| 111454 |
+
"grad_norm": 0.24512624740600586,
|
| 111455 |
+
"learning_rate": 7.271912104855894e-08,
|
| 111456 |
+
"loss": 0.5281206369400024,
|
| 111457 |
+
"step": 15921
|
| 111458 |
+
},
|
| 111459 |
+
{
|
| 111460 |
+
"epoch": 19.536196319018405,
|
| 111461 |
+
"grad_norm": 0.26523831486701965,
|
| 111462 |
+
"learning_rate": 7.233707686325198e-08,
|
| 111463 |
+
"loss": 0.584851861000061,
|
| 111464 |
+
"step": 15922
|
| 111465 |
+
},
|
| 111466 |
+
{
|
| 111467 |
+
"epoch": 19.537423312883437,
|
| 111468 |
+
"grad_norm": 0.2627396285533905,
|
| 111469 |
+
"learning_rate": 7.195603744093727e-08,
|
| 111470 |
+
"loss": 0.7432947158813477,
|
| 111471 |
+
"step": 15923
|
| 111472 |
+
},
|
| 111473 |
+
{
|
| 111474 |
+
"epoch": 19.538650306748465,
|
| 111475 |
+
"grad_norm": 0.25628870725631714,
|
| 111476 |
+
"learning_rate": 7.157600279698029e-08,
|
| 111477 |
+
"loss": 0.6964184641838074,
|
| 111478 |
+
"step": 15924
|
| 111479 |
+
},
|
| 111480 |
+
{
|
| 111481 |
+
"epoch": 19.539877300613497,
|
| 111482 |
+
"grad_norm": 0.29257985949516296,
|
| 111483 |
+
"learning_rate": 7.119697294669658e-08,
|
| 111484 |
+
"loss": 0.5086300373077393,
|
| 111485 |
+
"step": 15925
|
| 111486 |
+
},
|
| 111487 |
+
{
|
| 111488 |
+
"epoch": 19.54110429447853,
|
| 111489 |
+
"grad_norm": 0.2618100345134735,
|
| 111490 |
+
"learning_rate": 7.081894790536281e-08,
|
| 111491 |
+
"loss": 0.6850935816764832,
|
| 111492 |
+
"step": 15926
|
| 111493 |
+
},
|
| 111494 |
+
{
|
| 111495 |
+
"epoch": 19.542331288343558,
|
| 111496 |
+
"grad_norm": 0.2963346838951111,
|
| 111497 |
+
"learning_rate": 7.044192768821955e-08,
|
| 111498 |
+
"loss": 0.7493679523468018,
|
| 111499 |
+
"step": 15927
|
| 111500 |
+
},
|
| 111501 |
+
{
|
| 111502 |
+
"epoch": 19.54355828220859,
|
| 111503 |
+
"grad_norm": 0.25884315371513367,
|
| 111504 |
+
"learning_rate": 7.006591231045745e-08,
|
| 111505 |
+
"loss": 0.5280992984771729,
|
| 111506 |
+
"step": 15928
|
| 111507 |
+
},
|
| 111508 |
+
{
|
| 111509 |
+
"epoch": 19.54478527607362,
|
| 111510 |
+
"grad_norm": 0.25434258580207825,
|
| 111511 |
+
"learning_rate": 6.969090178723659e-08,
|
| 111512 |
+
"loss": 0.6953238248825073,
|
| 111513 |
+
"step": 15929
|
| 111514 |
+
},
|
| 111515 |
+
{
|
| 111516 |
+
"epoch": 19.54601226993865,
|
| 111517 |
+
"grad_norm": 0.2585464119911194,
|
| 111518 |
+
"learning_rate": 6.931689613367542e-08,
|
| 111519 |
+
"loss": 0.7088115811347961,
|
| 111520 |
+
"step": 15930
|
| 111521 |
+
},
|
| 111522 |
+
{
|
| 111523 |
+
"epoch": 19.547239263803682,
|
| 111524 |
+
"grad_norm": 0.26759961247444153,
|
| 111525 |
+
"learning_rate": 6.894389536484248e-08,
|
| 111526 |
+
"loss": 0.7121865749359131,
|
| 111527 |
+
"step": 15931
|
| 111528 |
+
},
|
| 111529 |
+
{
|
| 111530 |
+
"epoch": 19.54846625766871,
|
| 111531 |
+
"grad_norm": 0.28715232014656067,
|
| 111532 |
+
"learning_rate": 6.85718994957757e-08,
|
| 111533 |
+
"loss": 0.7158739566802979,
|
| 111534 |
+
"step": 15932
|
| 111535 |
+
},
|
| 111536 |
+
{
|
| 111537 |
+
"epoch": 19.549693251533743,
|
| 111538 |
+
"grad_norm": 0.24754102528095245,
|
| 111539 |
+
"learning_rate": 6.820090854146866e-08,
|
| 111540 |
+
"loss": 0.4226120114326477,
|
| 111541 |
+
"step": 15933
|
| 111542 |
+
},
|
| 111543 |
+
{
|
| 111544 |
+
"epoch": 19.550920245398775,
|
| 111545 |
+
"grad_norm": 0.24757280945777893,
|
| 111546 |
+
"learning_rate": 6.783092251687606e-08,
|
| 111547 |
+
"loss": 0.3920643925666809,
|
| 111548 |
+
"step": 15934
|
| 111549 |
+
},
|
| 111550 |
+
{
|
| 111551 |
+
"epoch": 19.552147239263803,
|
| 111552 |
+
"grad_norm": 0.2581540048122406,
|
| 111553 |
+
"learning_rate": 6.746194143691099e-08,
|
| 111554 |
+
"loss": 0.6029343605041504,
|
| 111555 |
+
"step": 15935
|
| 111556 |
+
},
|
| 111557 |
+
{
|
| 111558 |
+
"epoch": 19.553374233128835,
|
| 111559 |
+
"grad_norm": 0.25252625346183777,
|
| 111560 |
+
"learning_rate": 6.709396531644485e-08,
|
| 111561 |
+
"loss": 0.48421990871429443,
|
| 111562 |
+
"step": 15936
|
| 111563 |
+
},
|
| 111564 |
+
{
|
| 111565 |
+
"epoch": 19.554601226993864,
|
| 111566 |
+
"grad_norm": 0.25600069761276245,
|
| 111567 |
+
"learning_rate": 6.672699417031026e-08,
|
| 111568 |
+
"loss": 0.34504538774490356,
|
| 111569 |
+
"step": 15937
|
| 111570 |
+
},
|
| 111571 |
+
{
|
| 111572 |
+
"epoch": 19.555828220858896,
|
| 111573 |
+
"grad_norm": 0.2522304654121399,
|
| 111574 |
+
"learning_rate": 6.636102801329813e-08,
|
| 111575 |
+
"loss": 0.4672437310218811,
|
| 111576 |
+
"step": 15938
|
| 111577 |
+
},
|
| 111578 |
+
{
|
| 111579 |
+
"epoch": 19.557055214723928,
|
| 111580 |
+
"grad_norm": 0.2651875913143158,
|
| 111581 |
+
"learning_rate": 6.599606686015781e-08,
|
| 111582 |
+
"loss": 0.4058421850204468,
|
| 111583 |
+
"step": 15939
|
| 111584 |
+
},
|
| 111585 |
+
{
|
| 111586 |
+
"epoch": 19.558282208588956,
|
| 111587 |
+
"grad_norm": 0.2799481749534607,
|
| 111588 |
+
"learning_rate": 6.563211072560527e-08,
|
| 111589 |
+
"loss": 0.7666460871696472,
|
| 111590 |
+
"step": 15940
|
| 111591 |
+
},
|
| 111592 |
+
{
|
| 111593 |
+
"epoch": 19.559509202453988,
|
| 111594 |
+
"grad_norm": 0.22108659148216248,
|
| 111595 |
+
"learning_rate": 6.526915962430658e-08,
|
| 111596 |
+
"loss": 0.3916415572166443,
|
| 111597 |
+
"step": 15941
|
| 111598 |
+
},
|
| 111599 |
+
{
|
| 111600 |
+
"epoch": 19.56073619631902,
|
| 111601 |
+
"grad_norm": 0.2747965157032013,
|
| 111602 |
+
"learning_rate": 6.490721357089169e-08,
|
| 111603 |
+
"loss": 0.6071722507476807,
|
| 111604 |
+
"step": 15942
|
| 111605 |
+
},
|
| 111606 |
+
{
|
| 111607 |
+
"epoch": 19.56196319018405,
|
| 111608 |
+
"grad_norm": 0.25979363918304443,
|
| 111609 |
+
"learning_rate": 6.454627257994894e-08,
|
| 111610 |
+
"loss": 0.6049418449401855,
|
| 111611 |
+
"step": 15943
|
| 111612 |
+
},
|
| 111613 |
+
{
|
| 111614 |
+
"epoch": 19.56319018404908,
|
| 111615 |
+
"grad_norm": 0.26901867985725403,
|
| 111616 |
+
"learning_rate": 6.418633666602781e-08,
|
| 111617 |
+
"loss": 0.7086642384529114,
|
| 111618 |
+
"step": 15944
|
| 111619 |
+
},
|
| 111620 |
+
{
|
| 111621 |
+
"epoch": 19.56441717791411,
|
| 111622 |
+
"grad_norm": 0.25389572978019714,
|
| 111623 |
+
"learning_rate": 6.382740584363889e-08,
|
| 111624 |
+
"loss": 0.6163915395736694,
|
| 111625 |
+
"step": 15945
|
| 111626 |
+
},
|
| 111627 |
+
{
|
| 111628 |
+
"epoch": 19.56564417177914,
|
| 111629 |
+
"grad_norm": 0.2503718137741089,
|
| 111630 |
+
"learning_rate": 6.346948012724562e-08,
|
| 111631 |
+
"loss": 0.4150804281234741,
|
| 111632 |
+
"step": 15946
|
| 111633 |
+
},
|
| 111634 |
+
{
|
| 111635 |
+
"epoch": 19.566871165644173,
|
| 111636 |
+
"grad_norm": 0.25518688559532166,
|
| 111637 |
+
"learning_rate": 6.311255953127538e-08,
|
| 111638 |
+
"loss": 0.5330639481544495,
|
| 111639 |
+
"step": 15947
|
| 111640 |
+
},
|
| 111641 |
+
{
|
| 111642 |
+
"epoch": 19.5680981595092,
|
| 111643 |
+
"grad_norm": 0.299319326877594,
|
| 111644 |
+
"learning_rate": 6.275664407011661e-08,
|
| 111645 |
+
"loss": 0.7041675448417664,
|
| 111646 |
+
"step": 15948
|
| 111647 |
+
},
|
| 111648 |
+
{
|
| 111649 |
+
"epoch": 19.569325153374233,
|
| 111650 |
+
"grad_norm": 0.30883556604385376,
|
| 111651 |
+
"learning_rate": 6.240173375811343e-08,
|
| 111652 |
+
"loss": 0.7282319068908691,
|
| 111653 |
+
"step": 15949
|
| 111654 |
+
},
|
| 111655 |
+
{
|
| 111656 |
+
"epoch": 19.570552147239265,
|
| 111657 |
+
"grad_norm": 0.25967684388160706,
|
| 111658 |
+
"learning_rate": 6.204782860957381e-08,
|
| 111659 |
+
"loss": 0.6163053512573242,
|
| 111660 |
+
"step": 15950
|
| 111661 |
+
},
|
| 111662 |
+
{
|
| 111663 |
+
"epoch": 19.571779141104294,
|
| 111664 |
+
"grad_norm": 0.27229344844818115,
|
| 111665 |
+
"learning_rate": 6.169492863875858e-08,
|
| 111666 |
+
"loss": 0.5974706411361694,
|
| 111667 |
+
"step": 15951
|
| 111668 |
+
},
|
| 111669 |
+
{
|
| 111670 |
+
"epoch": 19.573006134969326,
|
| 111671 |
+
"grad_norm": 0.27585119009017944,
|
| 111672 |
+
"learning_rate": 6.134303385989804e-08,
|
| 111673 |
+
"loss": 0.5248773097991943,
|
| 111674 |
+
"step": 15952
|
| 111675 |
+
},
|
| 111676 |
+
{
|
| 111677 |
+
"epoch": 19.574233128834354,
|
| 111678 |
+
"grad_norm": 0.26701897382736206,
|
| 111679 |
+
"learning_rate": 6.09921442871697e-08,
|
| 111680 |
+
"loss": 0.5168187618255615,
|
| 111681 |
+
"step": 15953
|
| 111682 |
+
},
|
| 111683 |
+
{
|
| 111684 |
+
"epoch": 19.575460122699386,
|
| 111685 |
+
"grad_norm": 0.27038970589637756,
|
| 111686 |
+
"learning_rate": 6.064225993472061e-08,
|
| 111687 |
+
"loss": 0.6226307153701782,
|
| 111688 |
+
"step": 15954
|
| 111689 |
+
},
|
| 111690 |
+
{
|
| 111691 |
+
"epoch": 19.57668711656442,
|
| 111692 |
+
"grad_norm": 0.24386391043663025,
|
| 111693 |
+
"learning_rate": 6.029338081665059e-08,
|
| 111694 |
+
"loss": 0.46357542276382446,
|
| 111695 |
+
"step": 15955
|
| 111696 |
+
},
|
| 111697 |
+
{
|
| 111698 |
+
"epoch": 19.577914110429447,
|
| 111699 |
+
"grad_norm": 0.2578459084033966,
|
| 111700 |
+
"learning_rate": 5.994550694702616e-08,
|
| 111701 |
+
"loss": 0.612402081489563,
|
| 111702 |
+
"step": 15956
|
| 111703 |
+
},
|
| 111704 |
+
{
|
| 111705 |
+
"epoch": 19.57914110429448,
|
| 111706 |
+
"grad_norm": 0.2820955514907837,
|
| 111707 |
+
"learning_rate": 5.9598638339866676e-08,
|
| 111708 |
+
"loss": 0.6916606426239014,
|
| 111709 |
+
"step": 15957
|
| 111710 |
+
},
|
| 111711 |
+
{
|
| 111712 |
+
"epoch": 19.58036809815951,
|
| 111713 |
+
"grad_norm": 0.23977236449718475,
|
| 111714 |
+
"learning_rate": 5.925277500915538e-08,
|
| 111715 |
+
"loss": 0.4292460083961487,
|
| 111716 |
+
"step": 15958
|
| 111717 |
+
},
|
| 111718 |
+
{
|
| 111719 |
+
"epoch": 19.58159509202454,
|
| 111720 |
+
"grad_norm": 0.2951478064060211,
|
| 111721 |
+
"learning_rate": 5.890791696882836e-08,
|
| 111722 |
+
"loss": 0.7307682037353516,
|
| 111723 |
+
"step": 15959
|
| 111724 |
+
},
|
| 111725 |
+
{
|
| 111726 |
+
"epoch": 19.58282208588957,
|
| 111727 |
+
"grad_norm": 0.27148646116256714,
|
| 111728 |
+
"learning_rate": 5.856406423279115e-08,
|
| 111729 |
+
"loss": 0.6923516988754272,
|
| 111730 |
+
"step": 15960
|
| 111731 |
+
},
|
| 111732 |
+
{
|
| 111733 |
+
"epoch": 19.5840490797546,
|
| 111734 |
+
"grad_norm": 0.2864905893802643,
|
| 111735 |
+
"learning_rate": 5.8221216814902114e-08,
|
| 111736 |
+
"loss": 0.5641052722930908,
|
| 111737 |
+
"step": 15961
|
| 111738 |
+
},
|
| 111739 |
+
{
|
| 111740 |
+
"epoch": 19.58527607361963,
|
| 111741 |
+
"grad_norm": 0.2598612904548645,
|
| 111742 |
+
"learning_rate": 5.7879374728977974e-08,
|
| 111743 |
+
"loss": 0.6117309331893921,
|
| 111744 |
+
"step": 15962
|
| 111745 |
+
},
|
| 111746 |
+
{
|
| 111747 |
+
"epoch": 19.586503067484664,
|
| 111748 |
+
"grad_norm": 0.23265589773654938,
|
| 111749 |
+
"learning_rate": 5.753853798879938e-08,
|
| 111750 |
+
"loss": 0.3815501928329468,
|
| 111751 |
+
"step": 15963
|
| 111752 |
+
},
|
| 111753 |
+
{
|
| 111754 |
+
"epoch": 19.587730061349692,
|
| 111755 |
+
"grad_norm": 0.253229558467865,
|
| 111756 |
+
"learning_rate": 5.7198706608105335e-08,
|
| 111757 |
+
"loss": 0.6089166402816772,
|
| 111758 |
+
"step": 15964
|
| 111759 |
+
},
|
| 111760 |
+
{
|
| 111761 |
+
"epoch": 19.588957055214724,
|
| 111762 |
+
"grad_norm": 0.3138175308704376,
|
| 111763 |
+
"learning_rate": 5.685988060059044e-08,
|
| 111764 |
+
"loss": 0.630569577217102,
|
| 111765 |
+
"step": 15965
|
| 111766 |
+
},
|
| 111767 |
+
{
|
| 111768 |
+
"epoch": 19.590184049079756,
|
| 111769 |
+
"grad_norm": 0.2473113089799881,
|
| 111770 |
+
"learning_rate": 5.6522059979915996e-08,
|
| 111771 |
+
"loss": 0.5150618553161621,
|
| 111772 |
+
"step": 15966
|
| 111773 |
+
},
|
| 111774 |
+
{
|
| 111775 |
+
"epoch": 19.591411042944785,
|
| 111776 |
+
"grad_norm": 0.2767941653728485,
|
| 111777 |
+
"learning_rate": 5.618524475969334e-08,
|
| 111778 |
+
"loss": 0.5117374062538147,
|
| 111779 |
+
"step": 15967
|
| 111780 |
+
},
|
| 111781 |
+
{
|
| 111782 |
+
"epoch": 19.592638036809817,
|
| 111783 |
+
"grad_norm": 0.25861138105392456,
|
| 111784 |
+
"learning_rate": 5.5849434953503254e-08,
|
| 111785 |
+
"loss": 0.6653376817703247,
|
| 111786 |
+
"step": 15968
|
| 111787 |
+
},
|
| 111788 |
+
{
|
| 111789 |
+
"epoch": 19.593865030674845,
|
| 111790 |
+
"grad_norm": 0.2616503834724426,
|
| 111791 |
+
"learning_rate": 5.551463057487938e-08,
|
| 111792 |
+
"loss": 0.5457661151885986,
|
| 111793 |
+
"step": 15969
|
| 111794 |
+
},
|
| 111795 |
+
{
|
| 111796 |
+
"epoch": 19.595092024539877,
|
| 111797 |
+
"grad_norm": 0.25805747509002686,
|
| 111798 |
+
"learning_rate": 5.518083163731647e-08,
|
| 111799 |
+
"loss": 0.6343573331832886,
|
| 111800 |
+
"step": 15970
|
| 111801 |
+
},
|
| 111802 |
+
{
|
| 111803 |
+
"epoch": 19.59631901840491,
|
| 111804 |
+
"grad_norm": 0.24960413575172424,
|
| 111805 |
+
"learning_rate": 5.484803815427042e-08,
|
| 111806 |
+
"loss": 0.6431906223297119,
|
| 111807 |
+
"step": 15971
|
| 111808 |
+
},
|
| 111809 |
+
{
|
| 111810 |
+
"epoch": 19.597546012269937,
|
| 111811 |
+
"grad_norm": 0.2577416002750397,
|
| 111812 |
+
"learning_rate": 5.4516250139152733e-08,
|
| 111813 |
+
"loss": 0.6427536606788635,
|
| 111814 |
+
"step": 15972
|
| 111815 |
+
},
|
| 111816 |
+
{
|
| 111817 |
+
"epoch": 19.59877300613497,
|
| 111818 |
+
"grad_norm": 0.2893083393573761,
|
| 111819 |
+
"learning_rate": 5.4185467605338824e-08,
|
| 111820 |
+
"loss": 0.5637693405151367,
|
| 111821 |
+
"step": 15973
|
| 111822 |
+
},
|
| 111823 |
+
{
|
| 111824 |
+
"epoch": 19.6,
|
| 111825 |
+
"grad_norm": 0.2627011835575104,
|
| 111826 |
+
"learning_rate": 5.385569056616246e-08,
|
| 111827 |
+
"loss": 0.2358238697052002,
|
| 111828 |
+
"step": 15974
|
| 111829 |
+
},
|
| 111830 |
+
{
|
| 111831 |
+
"epoch": 19.60122699386503,
|
| 111832 |
+
"grad_norm": 0.2792356312274933,
|
| 111833 |
+
"learning_rate": 5.352691903491303e-08,
|
| 111834 |
+
"loss": 0.5992540717124939,
|
| 111835 |
+
"step": 15975
|
| 111836 |
+
},
|
| 111837 |
+
{
|
| 111838 |
+
"epoch": 19.602453987730062,
|
| 111839 |
+
"grad_norm": 0.2684832811355591,
|
| 111840 |
+
"learning_rate": 5.31991530248438e-08,
|
| 111841 |
+
"loss": 0.7728331685066223,
|
| 111842 |
+
"step": 15976
|
| 111843 |
+
},
|
| 111844 |
+
{
|
| 111845 |
+
"epoch": 19.60368098159509,
|
| 111846 |
+
"grad_norm": 0.2679431140422821,
|
| 111847 |
+
"learning_rate": 5.2872392549166424e-08,
|
| 111848 |
+
"loss": 0.5410866737365723,
|
| 111849 |
+
"step": 15977
|
| 111850 |
+
},
|
| 111851 |
+
{
|
| 111852 |
+
"epoch": 19.604907975460122,
|
| 111853 |
+
"grad_norm": 0.2507835626602173,
|
| 111854 |
+
"learning_rate": 5.2546637621050943e-08,
|
| 111855 |
+
"loss": 0.5245778560638428,
|
| 111856 |
+
"step": 15978
|
| 111857 |
+
},
|
| 111858 |
+
{
|
| 111859 |
+
"epoch": 19.606134969325154,
|
| 111860 |
+
"grad_norm": 0.2643088698387146,
|
| 111861 |
+
"learning_rate": 5.2221888253628505e-08,
|
| 111862 |
+
"loss": 0.6431630253791809,
|
| 111863 |
+
"step": 15979
|
| 111864 |
+
},
|
| 111865 |
+
{
|
| 111866 |
+
"epoch": 19.607361963190183,
|
| 111867 |
+
"grad_norm": 0.2940313518047333,
|
| 111868 |
+
"learning_rate": 5.189814445998864e-08,
|
| 111869 |
+
"loss": 0.6931966543197632,
|
| 111870 |
+
"step": 15980
|
| 111871 |
+
},
|
| 111872 |
+
{
|
| 111873 |
+
"epoch": 19.608588957055215,
|
| 111874 |
+
"grad_norm": 0.26726362109184265,
|
| 111875 |
+
"learning_rate": 5.1575406253182026e-08,
|
| 111876 |
+
"loss": 0.6651864051818848,
|
| 111877 |
+
"step": 15981
|
| 111878 |
+
},
|
| 111879 |
+
{
|
| 111880 |
+
"epoch": 19.609815950920247,
|
| 111881 |
+
"grad_norm": 0.29355674982070923,
|
| 111882 |
+
"learning_rate": 5.125367364621492e-08,
|
| 111883 |
+
"loss": 0.5984067916870117,
|
| 111884 |
+
"step": 15982
|
| 111885 |
+
},
|
| 111886 |
+
{
|
| 111887 |
+
"epoch": 19.611042944785275,
|
| 111888 |
+
"grad_norm": 0.2607671320438385,
|
| 111889 |
+
"learning_rate": 5.0932946652054724e-08,
|
| 111890 |
+
"loss": 0.5616246461868286,
|
| 111891 |
+
"step": 15983
|
| 111892 |
+
},
|
| 111893 |
+
{
|
| 111894 |
+
"epoch": 19.612269938650307,
|
| 111895 |
+
"grad_norm": 0.30474963784217834,
|
| 111896 |
+
"learning_rate": 5.0613225283629995e-08,
|
| 111897 |
+
"loss": 0.726374626159668,
|
| 111898 |
+
"step": 15984
|
| 111899 |
+
},
|
| 111900 |
+
{
|
| 111901 |
+
"epoch": 19.61349693251534,
|
| 111902 |
+
"grad_norm": 0.25811973214149475,
|
| 111903 |
+
"learning_rate": 5.0294509553830415e-08,
|
| 111904 |
+
"loss": 0.6424156427383423,
|
| 111905 |
+
"step": 15985
|
| 111906 |
+
},
|
| 111907 |
+
{
|
| 111908 |
+
"epoch": 19.614723926380368,
|
| 111909 |
+
"grad_norm": 0.2733149528503418,
|
| 111910 |
+
"learning_rate": 4.997679947549849e-08,
|
| 111911 |
+
"loss": 0.5350072383880615,
|
| 111912 |
+
"step": 15986
|
| 111913 |
+
},
|
| 111914 |
+
{
|
| 111915 |
+
"epoch": 19.6159509202454,
|
| 111916 |
+
"grad_norm": 0.23136980831623077,
|
| 111917 |
+
"learning_rate": 4.966009506144342e-08,
|
| 111918 |
+
"loss": 0.3736768960952759,
|
| 111919 |
+
"step": 15987
|
| 111920 |
+
},
|
| 111921 |
+
{
|
| 111922 |
+
"epoch": 19.617177914110428,
|
| 111923 |
+
"grad_norm": 0.27484381198883057,
|
| 111924 |
+
"learning_rate": 4.934439632443e-08,
|
| 111925 |
+
"loss": 0.6022725105285645,
|
| 111926 |
+
"step": 15988
|
| 111927 |
+
},
|
| 111928 |
+
{
|
| 111929 |
+
"epoch": 19.61840490797546,
|
| 111930 |
+
"grad_norm": 0.23771561682224274,
|
| 111931 |
+
"learning_rate": 4.902970327718137e-08,
|
| 111932 |
+
"loss": 0.4629988670349121,
|
| 111933 |
+
"step": 15989
|
| 111934 |
+
},
|
| 111935 |
+
{
|
| 111936 |
+
"epoch": 19.619631901840492,
|
| 111937 |
+
"grad_norm": 0.27798375487327576,
|
| 111938 |
+
"learning_rate": 4.871601593238184e-08,
|
| 111939 |
+
"loss": 0.6429271697998047,
|
| 111940 |
+
"step": 15990
|
| 111941 |
+
},
|
| 111942 |
+
{
|
| 111943 |
+
"epoch": 19.62085889570552,
|
| 111944 |
+
"grad_norm": 0.28687602281570435,
|
| 111945 |
+
"learning_rate": 4.840333430267685e-08,
|
| 111946 |
+
"loss": 0.6762521266937256,
|
| 111947 |
+
"step": 15991
|
| 111948 |
+
},
|
| 111949 |
+
{
|
| 111950 |
+
"epoch": 19.622085889570553,
|
| 111951 |
+
"grad_norm": 0.2383459359407425,
|
| 111952 |
+
"learning_rate": 4.8091658400670203e-08,
|
| 111953 |
+
"loss": 0.5714373588562012,
|
| 111954 |
+
"step": 15992
|
| 111955 |
+
},
|
| 111956 |
+
{
|
| 111957 |
+
"epoch": 19.62331288343558,
|
| 111958 |
+
"grad_norm": 0.2682969570159912,
|
| 111959 |
+
"learning_rate": 4.778098823892407e-08,
|
| 111960 |
+
"loss": 0.5539230108261108,
|
| 111961 |
+
"step": 15993
|
| 111962 |
+
},
|
| 111963 |
+
{
|
| 111964 |
+
"epoch": 19.624539877300613,
|
| 111965 |
+
"grad_norm": 0.1876458376646042,
|
| 111966 |
+
"learning_rate": 4.7471323829959e-08,
|
| 111967 |
+
"loss": 0.2790374159812927,
|
| 111968 |
+
"step": 15994
|
| 111969 |
+
},
|
| 111970 |
+
{
|
| 111971 |
+
"epoch": 19.625766871165645,
|
| 111972 |
+
"grad_norm": 0.30028119683265686,
|
| 111973 |
+
"learning_rate": 4.716266518625945e-08,
|
| 111974 |
+
"loss": 0.7559159398078918,
|
| 111975 |
+
"step": 15995
|
| 111976 |
+
},
|
| 111977 |
+
{
|
| 111978 |
+
"epoch": 19.626993865030673,
|
| 111979 |
+
"grad_norm": 0.2864221930503845,
|
| 111980 |
+
"learning_rate": 4.6855012320265456e-08,
|
| 111981 |
+
"loss": 0.6386426687240601,
|
| 111982 |
+
"step": 15996
|
| 111983 |
+
},
|
| 111984 |
+
{
|
| 111985 |
+
"epoch": 19.628220858895705,
|
| 111986 |
+
"grad_norm": 0.2376590073108673,
|
| 111987 |
+
"learning_rate": 4.6548365244375446e-08,
|
| 111988 |
+
"loss": 0.40150660276412964,
|
| 111989 |
+
"step": 15997
|
| 111990 |
+
},
|
| 111991 |
+
{
|
| 111992 |
+
"epoch": 19.629447852760737,
|
| 111993 |
+
"grad_norm": 0.2645387649536133,
|
| 111994 |
+
"learning_rate": 4.624272397095175e-08,
|
| 111995 |
+
"loss": 0.37160634994506836,
|
| 111996 |
+
"step": 15998
|
| 111997 |
+
},
|
| 111998 |
+
{
|
| 111999 |
+
"epoch": 19.630674846625766,
|
| 112000 |
+
"grad_norm": 0.2877919375896454,
|
| 112001 |
+
"learning_rate": 4.593808851231507e-08,
|
| 112002 |
+
"loss": 0.6853649616241455,
|
| 112003 |
+
"step": 15999
|
| 112004 |
+
},
|
| 112005 |
+
{
|
| 112006 |
+
"epoch": 19.631901840490798,
|
| 112007 |
+
"grad_norm": 0.30210092663764954,
|
| 112008 |
+
"learning_rate": 4.563445888074169e-08,
|
| 112009 |
+
"loss": 0.7398550510406494,
|
| 112010 |
+
"step": 16000
|
| 112011 |
+
},
|
| 112012 |
+
{
|
| 112013 |
+
"epoch": 19.63312883435583,
|
| 112014 |
+
"grad_norm": 0.2839699983596802,
|
| 112015 |
+
"learning_rate": 4.533183508847183e-08,
|
| 112016 |
+
"loss": 0.6766139268875122,
|
| 112017 |
+
"step": 16001
|
| 112018 |
+
},
|
| 112019 |
+
{
|
| 112020 |
+
"epoch": 19.63435582822086,
|
| 112021 |
+
"grad_norm": 0.26487016677856445,
|
| 112022 |
+
"learning_rate": 4.5030217147701284e-08,
|
| 112023 |
+
"loss": 0.6392691135406494,
|
| 112024 |
+
"step": 16002
|
| 112025 |
+
},
|
| 112026 |
+
{
|
| 112027 |
+
"epoch": 19.63558282208589,
|
| 112028 |
+
"grad_norm": 0.2603174149990082,
|
| 112029 |
+
"learning_rate": 4.472960507058976e-08,
|
| 112030 |
+
"loss": 0.4220722019672394,
|
| 112031 |
+
"step": 16003
|
| 112032 |
+
},
|
| 112033 |
+
{
|
| 112034 |
+
"epoch": 19.63680981595092,
|
| 112035 |
+
"grad_norm": 0.28972843289375305,
|
| 112036 |
+
"learning_rate": 4.442999886925536e-08,
|
| 112037 |
+
"loss": 0.7597860097885132,
|
| 112038 |
+
"step": 16004
|
| 112039 |
+
},
|
| 112040 |
+
{
|
| 112041 |
+
"epoch": 19.63803680981595,
|
| 112042 |
+
"grad_norm": 0.27056142687797546,
|
| 112043 |
+
"learning_rate": 4.4131398555768976e-08,
|
| 112044 |
+
"loss": 0.7562472820281982,
|
| 112045 |
+
"step": 16005
|
| 112046 |
+
},
|
| 112047 |
+
{
|
| 112048 |
+
"epoch": 19.639263803680983,
|
| 112049 |
+
"grad_norm": 0.2781376838684082,
|
| 112050 |
+
"learning_rate": 4.383380414217098e-08,
|
| 112051 |
+
"loss": 0.5332688093185425,
|
| 112052 |
+
"step": 16006
|
| 112053 |
+
},
|
| 112054 |
+
{
|
| 112055 |
+
"epoch": 19.64049079754601,
|
| 112056 |
+
"grad_norm": 0.265010267496109,
|
| 112057 |
+
"learning_rate": 4.353721564045454e-08,
|
| 112058 |
+
"loss": 0.5570353865623474,
|
| 112059 |
+
"step": 16007
|
| 112060 |
+
},
|
| 112061 |
+
{
|
| 112062 |
+
"epoch": 19.641717791411043,
|
| 112063 |
+
"grad_norm": 0.2824796736240387,
|
| 112064 |
+
"learning_rate": 4.3241633062574006e-08,
|
| 112065 |
+
"loss": 0.6140771508216858,
|
| 112066 |
+
"step": 16008
|
| 112067 |
+
},
|
| 112068 |
+
{
|
| 112069 |
+
"epoch": 19.642944785276075,
|
| 112070 |
+
"grad_norm": 0.23544342815876007,
|
| 112071 |
+
"learning_rate": 4.2947056420447606e-08,
|
| 112072 |
+
"loss": 0.3051731288433075,
|
| 112073 |
+
"step": 16009
|
| 112074 |
+
},
|
| 112075 |
+
{
|
| 112076 |
+
"epoch": 19.644171779141104,
|
| 112077 |
+
"grad_norm": 0.2712775468826294,
|
| 112078 |
+
"learning_rate": 4.265348572594363e-08,
|
| 112079 |
+
"loss": 0.7392816543579102,
|
| 112080 |
+
"step": 16010
|
| 112081 |
+
},
|
| 112082 |
+
{
|
| 112083 |
+
"epoch": 19.645398773006136,
|
| 112084 |
+
"grad_norm": 0.2700348496437073,
|
| 112085 |
+
"learning_rate": 4.236092099089706e-08,
|
| 112086 |
+
"loss": 0.507668137550354,
|
| 112087 |
+
"step": 16011
|
| 112088 |
+
},
|
| 112089 |
+
{
|
| 112090 |
+
"epoch": 19.646625766871164,
|
| 112091 |
+
"grad_norm": 0.25249290466308594,
|
| 112092 |
+
"learning_rate": 4.2069362227098454e-08,
|
| 112093 |
+
"loss": 0.5745823383331299,
|
| 112094 |
+
"step": 16012
|
| 112095 |
+
},
|
| 112096 |
+
{
|
| 112097 |
+
"epoch": 19.647852760736196,
|
| 112098 |
+
"grad_norm": 0.2548300623893738,
|
| 112099 |
+
"learning_rate": 4.1778809446302304e-08,
|
| 112100 |
+
"loss": 0.665492057800293,
|
| 112101 |
+
"step": 16013
|
| 112102 |
+
},
|
| 112103 |
+
{
|
| 112104 |
+
"epoch": 19.649079754601228,
|
| 112105 |
+
"grad_norm": 0.2931409478187561,
|
| 112106 |
+
"learning_rate": 4.1489262660221465e-08,
|
| 112107 |
+
"loss": 0.6510673761367798,
|
| 112108 |
+
"step": 16014
|
| 112109 |
+
},
|
| 112110 |
+
{
|
| 112111 |
+
"epoch": 19.650306748466257,
|
| 112112 |
+
"grad_norm": 0.2476225644350052,
|
| 112113 |
+
"learning_rate": 4.120072188052437e-08,
|
| 112114 |
+
"loss": 0.5178371071815491,
|
| 112115 |
+
"step": 16015
|
| 112116 |
+
},
|
| 112117 |
+
{
|
| 112118 |
+
"epoch": 19.65153374233129,
|
| 112119 |
+
"grad_norm": 0.25723797082901,
|
| 112120 |
+
"learning_rate": 4.091318711884062e-08,
|
| 112121 |
+
"loss": 0.41661933064460754,
|
| 112122 |
+
"step": 16016
|
| 112123 |
+
},
|
| 112124 |
+
{
|
| 112125 |
+
"epoch": 19.65276073619632,
|
| 112126 |
+
"grad_norm": 0.255218505859375,
|
| 112127 |
+
"learning_rate": 4.062665838676094e-08,
|
| 112128 |
+
"loss": 0.6123206615447998,
|
| 112129 |
+
"step": 16017
|
| 112130 |
+
},
|
| 112131 |
+
{
|
| 112132 |
+
"epoch": 19.65398773006135,
|
| 112133 |
+
"grad_norm": 0.25502490997314453,
|
| 112134 |
+
"learning_rate": 4.034113569583442e-08,
|
| 112135 |
+
"loss": 0.5211910009384155,
|
| 112136 |
+
"step": 16018
|
| 112137 |
+
},
|
| 112138 |
+
{
|
| 112139 |
+
"epoch": 19.65521472392638,
|
| 112140 |
+
"grad_norm": 0.2687593698501587,
|
| 112141 |
+
"learning_rate": 4.005661905756852e-08,
|
| 112142 |
+
"loss": 0.6342282295227051,
|
| 112143 |
+
"step": 16019
|
| 112144 |
+
},
|
| 112145 |
+
{
|
| 112146 |
+
"epoch": 19.65644171779141,
|
| 112147 |
+
"grad_norm": 0.2593687176704407,
|
| 112148 |
+
"learning_rate": 3.9773108483431855e-08,
|
| 112149 |
+
"loss": 0.6300607323646545,
|
| 112150 |
+
"step": 16020
|
| 112151 |
+
},
|
| 112152 |
+
{
|
| 112153 |
+
"epoch": 19.65766871165644,
|
| 112154 |
+
"grad_norm": 0.2779507339000702,
|
| 112155 |
+
"learning_rate": 3.9490603984854156e-08,
|
| 112156 |
+
"loss": 0.5340242981910706,
|
| 112157 |
+
"step": 16021
|
| 112158 |
+
},
|
| 112159 |
+
{
|
| 112160 |
+
"epoch": 19.658895705521473,
|
| 112161 |
+
"grad_norm": 0.2657099962234497,
|
| 112162 |
+
"learning_rate": 3.920910557322077e-08,
|
| 112163 |
+
"loss": 0.5706982612609863,
|
| 112164 |
+
"step": 16022
|
| 112165 |
+
},
|
| 112166 |
+
{
|
| 112167 |
+
"epoch": 19.660122699386502,
|
| 112168 |
+
"grad_norm": 0.2637782394886017,
|
| 112169 |
+
"learning_rate": 3.89286132598754e-08,
|
| 112170 |
+
"loss": 0.5678005218505859,
|
| 112171 |
+
"step": 16023
|
| 112172 |
+
},
|
| 112173 |
+
{
|
| 112174 |
+
"epoch": 19.661349693251534,
|
| 112175 |
+
"grad_norm": 0.27354493737220764,
|
| 112176 |
+
"learning_rate": 3.864912705612844e-08,
|
| 112177 |
+
"loss": 0.6449894905090332,
|
| 112178 |
+
"step": 16024
|
| 112179 |
+
},
|
| 112180 |
+
{
|
| 112181 |
+
"epoch": 19.662576687116566,
|
| 112182 |
+
"grad_norm": 0.2575005888938904,
|
| 112183 |
+
"learning_rate": 3.8370646973243106e-08,
|
| 112184 |
+
"loss": 0.5875341892242432,
|
| 112185 |
+
"step": 16025
|
| 112186 |
+
},
|
| 112187 |
+
{
|
| 112188 |
+
"epoch": 19.663803680981594,
|
| 112189 |
+
"grad_norm": 0.26397693157196045,
|
| 112190 |
+
"learning_rate": 3.8093173022443754e-08,
|
| 112191 |
+
"loss": 0.7614321708679199,
|
| 112192 |
+
"step": 16026
|
| 112193 |
+
},
|
| 112194 |
+
{
|
| 112195 |
+
"epoch": 19.665030674846626,
|
| 112196 |
+
"grad_norm": 0.2759767472743988,
|
| 112197 |
+
"learning_rate": 3.781670521491587e-08,
|
| 112198 |
+
"loss": 0.5747803449630737,
|
| 112199 |
+
"step": 16027
|
| 112200 |
+
},
|
| 112201 |
+
{
|
| 112202 |
+
"epoch": 19.666257668711655,
|
| 112203 |
+
"grad_norm": 0.25943878293037415,
|
| 112204 |
+
"learning_rate": 3.754124356180055e-08,
|
| 112205 |
+
"loss": 0.49038490653038025,
|
| 112206 |
+
"step": 16028
|
| 112207 |
+
},
|
| 112208 |
+
{
|
| 112209 |
+
"epoch": 19.667484662576687,
|
| 112210 |
+
"grad_norm": 0.271399587392807,
|
| 112211 |
+
"learning_rate": 3.726678807420558e-08,
|
| 112212 |
+
"loss": 0.6361850500106812,
|
| 112213 |
+
"step": 16029
|
| 112214 |
+
},
|
| 112215 |
+
{
|
| 112216 |
+
"epoch": 19.66871165644172,
|
| 112217 |
+
"grad_norm": 0.2906273305416107,
|
| 112218 |
+
"learning_rate": 3.6993338763186004e-08,
|
| 112219 |
+
"loss": 0.6132738590240479,
|
| 112220 |
+
"step": 16030
|
| 112221 |
+
},
|
| 112222 |
+
{
|
| 112223 |
+
"epoch": 19.669938650306747,
|
| 112224 |
+
"grad_norm": 0.22629183530807495,
|
| 112225 |
+
"learning_rate": 3.672089563977188e-08,
|
| 112226 |
+
"loss": 0.47655367851257324,
|
| 112227 |
+
"step": 16031
|
| 112228 |
+
},
|
| 112229 |
+
{
|
| 112230 |
+
"epoch": 19.67116564417178,
|
| 112231 |
+
"grad_norm": 0.28109127283096313,
|
| 112232 |
+
"learning_rate": 3.6449458714940546e-08,
|
| 112233 |
+
"loss": 0.6552042961120605,
|
| 112234 |
+
"step": 16032
|
| 112235 |
+
},
|
| 112236 |
+
{
|
| 112237 |
+
"epoch": 19.67239263803681,
|
| 112238 |
+
"grad_norm": 0.2612967789173126,
|
| 112239 |
+
"learning_rate": 3.617902799963047e-08,
|
| 112240 |
+
"loss": 0.5518936514854431,
|
| 112241 |
+
"step": 16033
|
| 112242 |
+
},
|
| 112243 |
+
{
|
| 112244 |
+
"epoch": 19.67361963190184,
|
| 112245 |
+
"grad_norm": 0.24125449359416962,
|
| 112246 |
+
"learning_rate": 3.590960350474681e-08,
|
| 112247 |
+
"loss": 0.37665876746177673,
|
| 112248 |
+
"step": 16034
|
| 112249 |
+
},
|
| 112250 |
+
{
|
| 112251 |
+
"epoch": 19.67484662576687,
|
| 112252 |
+
"grad_norm": 0.29064464569091797,
|
| 112253 |
+
"learning_rate": 3.564118524114757e-08,
|
| 112254 |
+
"loss": 0.8693332672119141,
|
| 112255 |
+
"step": 16035
|
| 112256 |
+
},
|
| 112257 |
+
{
|
| 112258 |
+
"epoch": 19.6760736196319,
|
| 112259 |
+
"grad_norm": 0.2659071981906891,
|
| 112260 |
+
"learning_rate": 3.537377321965185e-08,
|
| 112261 |
+
"loss": 0.5995357036590576,
|
| 112262 |
+
"step": 16036
|
| 112263 |
+
},
|
| 112264 |
+
{
|
| 112265 |
+
"epoch": 19.677300613496932,
|
| 112266 |
+
"grad_norm": 0.3252749741077423,
|
| 112267 |
+
"learning_rate": 3.5107367451037156e-08,
|
| 112268 |
+
"loss": 0.7127843499183655,
|
| 112269 |
+
"step": 16037
|
| 112270 |
+
},
|
| 112271 |
+
{
|
| 112272 |
+
"epoch": 19.678527607361964,
|
| 112273 |
+
"grad_norm": 0.2711711525917053,
|
| 112274 |
+
"learning_rate": 3.484196794604211e-08,
|
| 112275 |
+
"loss": 0.4827805161476135,
|
| 112276 |
+
"step": 16038
|
| 112277 |
+
},
|
| 112278 |
+
{
|
| 112279 |
+
"epoch": 19.679754601226993,
|
| 112280 |
+
"grad_norm": 0.26385441422462463,
|
| 112281 |
+
"learning_rate": 3.457757471536649e-08,
|
| 112282 |
+
"loss": 0.6378778219223022,
|
| 112283 |
+
"step": 16039
|
| 112284 |
+
},
|
| 112285 |
+
{
|
| 112286 |
+
"epoch": 19.680981595092025,
|
| 112287 |
+
"grad_norm": 0.2938792407512665,
|
| 112288 |
+
"learning_rate": 3.431418776966289e-08,
|
| 112289 |
+
"loss": 0.6333194971084595,
|
| 112290 |
+
"step": 16040
|
| 112291 |
+
},
|
| 112292 |
+
{
|
| 112293 |
+
"epoch": 19.682208588957057,
|
| 112294 |
+
"grad_norm": 0.27745676040649414,
|
| 112295 |
+
"learning_rate": 3.405180711955058e-08,
|
| 112296 |
+
"loss": 0.6715232133865356,
|
| 112297 |
+
"step": 16041
|
| 112298 |
+
},
|
| 112299 |
+
{
|
| 112300 |
+
"epoch": 19.683435582822085,
|
| 112301 |
+
"grad_norm": 0.25622454285621643,
|
| 112302 |
+
"learning_rate": 3.379043277560445e-08,
|
| 112303 |
+
"loss": 0.6189010143280029,
|
| 112304 |
+
"step": 16042
|
| 112305 |
+
},
|
| 112306 |
+
{
|
| 112307 |
+
"epoch": 19.684662576687117,
|
| 112308 |
+
"grad_norm": 0.24024777114391327,
|
| 112309 |
+
"learning_rate": 3.3530064748360515e-08,
|
| 112310 |
+
"loss": 0.5002444386482239,
|
| 112311 |
+
"step": 16043
|
| 112312 |
+
},
|
| 112313 |
+
{
|
| 112314 |
+
"epoch": 19.68588957055215,
|
| 112315 |
+
"grad_norm": 0.24615056812763214,
|
| 112316 |
+
"learning_rate": 3.3270703048313147e-08,
|
| 112317 |
+
"loss": 0.5957982540130615,
|
| 112318 |
+
"step": 16044
|
| 112319 |
+
},
|
| 112320 |
+
{
|
| 112321 |
+
"epoch": 19.687116564417177,
|
| 112322 |
+
"grad_norm": 0.2776491343975067,
|
| 112323 |
+
"learning_rate": 3.3012347685915104e-08,
|
| 112324 |
+
"loss": 0.5292474031448364,
|
| 112325 |
+
"step": 16045
|
| 112326 |
+
},
|
| 112327 |
+
{
|
| 112328 |
+
"epoch": 19.68834355828221,
|
| 112329 |
+
"grad_norm": 0.2500789761543274,
|
| 112330 |
+
"learning_rate": 3.2754998671583046e-08,
|
| 112331 |
+
"loss": 0.6083974242210388,
|
| 112332 |
+
"step": 16046
|
| 112333 |
+
},
|
| 112334 |
+
{
|
| 112335 |
+
"epoch": 19.689570552147238,
|
| 112336 |
+
"grad_norm": 0.29520708322525024,
|
| 112337 |
+
"learning_rate": 3.249865601568647e-08,
|
| 112338 |
+
"loss": 0.7569975852966309,
|
| 112339 |
+
"step": 16047
|
| 112340 |
+
},
|
| 112341 |
+
{
|
| 112342 |
+
"epoch": 19.69079754601227,
|
| 112343 |
+
"grad_norm": 0.27156761288642883,
|
| 112344 |
+
"learning_rate": 3.224331972856154e-08,
|
| 112345 |
+
"loss": 0.773374080657959,
|
| 112346 |
+
"step": 16048
|
| 112347 |
+
},
|
| 112348 |
+
{
|
| 112349 |
+
"epoch": 19.692024539877302,
|
| 112350 |
+
"grad_norm": 0.277505487203598,
|
| 112351 |
+
"learning_rate": 3.198898982049725e-08,
|
| 112352 |
+
"loss": 0.7151801586151123,
|
| 112353 |
+
"step": 16049
|
| 112354 |
+
},
|
| 112355 |
+
{
|
| 112356 |
+
"epoch": 19.69325153374233,
|
| 112357 |
+
"grad_norm": 0.2487240731716156,
|
| 112358 |
+
"learning_rate": 3.1735666301746514e-08,
|
| 112359 |
+
"loss": 0.6857708692550659,
|
| 112360 |
+
"step": 16050
|
| 112361 |
+
},
|
| 112362 |
+
{
|
| 112363 |
+
"epoch": 19.694478527607362,
|
| 112364 |
+
"grad_norm": 0.2689577043056488,
|
| 112365 |
+
"learning_rate": 3.148334918251783e-08,
|
| 112366 |
+
"loss": 0.5517776608467102,
|
| 112367 |
+
"step": 16051
|
| 112368 |
+
},
|
| 112369 |
+
{
|
| 112370 |
+
"epoch": 19.69570552147239,
|
| 112371 |
+
"grad_norm": 0.28085383772850037,
|
| 112372 |
+
"learning_rate": 3.123203847298362e-08,
|
| 112373 |
+
"loss": 0.6142587065696716,
|
| 112374 |
+
"step": 16052
|
| 112375 |
+
},
|
| 112376 |
+
{
|
| 112377 |
+
"epoch": 19.696932515337423,
|
| 112378 |
+
"grad_norm": 0.2815287411212921,
|
| 112379 |
+
"learning_rate": 3.0981734183274657e-08,
|
| 112380 |
+
"loss": 0.8219236731529236,
|
| 112381 |
+
"step": 16053
|
| 112382 |
+
},
|
| 112383 |
+
{
|
| 112384 |
+
"epoch": 19.698159509202455,
|
| 112385 |
+
"grad_norm": 0.2464151233434677,
|
| 112386 |
+
"learning_rate": 3.073243632347456e-08,
|
| 112387 |
+
"loss": 0.5621984601020813,
|
| 112388 |
+
"step": 16054
|
| 112389 |
+
},
|
| 112390 |
+
{
|
| 112391 |
+
"epoch": 19.699386503067483,
|
| 112392 |
+
"grad_norm": 0.27578023076057434,
|
| 112393 |
+
"learning_rate": 3.0484144903639156e-08,
|
| 112394 |
+
"loss": 0.6351155638694763,
|
| 112395 |
+
"step": 16055
|
| 112396 |
+
},
|
| 112397 |
+
{
|
| 112398 |
+
"epoch": 19.700613496932515,
|
| 112399 |
+
"grad_norm": 0.27246275544166565,
|
| 112400 |
+
"learning_rate": 3.023685993376879e-08,
|
| 112401 |
+
"loss": 0.5692144632339478,
|
| 112402 |
+
"step": 16056
|
| 112403 |
+
},
|
| 112404 |
+
{
|
| 112405 |
+
"epoch": 19.701840490797547,
|
| 112406 |
+
"grad_norm": 0.29104340076446533,
|
| 112407 |
+
"learning_rate": 2.999058142383604e-08,
|
| 112408 |
+
"loss": 0.6804317831993103,
|
| 112409 |
+
"step": 16057
|
| 112410 |
+
},
|
| 112411 |
+
{
|
| 112412 |
+
"epoch": 19.703067484662576,
|
| 112413 |
+
"grad_norm": 0.2483447641134262,
|
| 112414 |
+
"learning_rate": 2.974530938376907e-08,
|
| 112415 |
+
"loss": 0.368174284696579,
|
| 112416 |
+
"step": 16058
|
| 112417 |
+
},
|
| 112418 |
+
{
|
| 112419 |
+
"epoch": 19.704294478527608,
|
| 112420 |
+
"grad_norm": 0.2847994565963745,
|
| 112421 |
+
"learning_rate": 2.950104382344887e-08,
|
| 112422 |
+
"loss": 0.6415199637413025,
|
| 112423 |
+
"step": 16059
|
| 112424 |
+
},
|
| 112425 |
+
{
|
| 112426 |
+
"epoch": 19.70552147239264,
|
| 112427 |
+
"grad_norm": 0.2714451849460602,
|
| 112428 |
+
"learning_rate": 2.9257784752723118e-08,
|
| 112429 |
+
"loss": 0.5773791074752808,
|
| 112430 |
+
"step": 16060
|
| 112431 |
+
},
|
| 112432 |
+
{
|
| 112433 |
+
"epoch": 19.706748466257668,
|
| 112434 |
+
"grad_norm": 0.2644261419773102,
|
| 112435 |
+
"learning_rate": 2.9015532181397853e-08,
|
| 112436 |
+
"loss": 0.6783016920089722,
|
| 112437 |
+
"step": 16061
|
| 112438 |
+
},
|
| 112439 |
+
{
|
| 112440 |
+
"epoch": 19.7079754601227,
|
| 112441 |
+
"grad_norm": 0.29063233733177185,
|
| 112442 |
+
"learning_rate": 2.8774286119234716e-08,
|
| 112443 |
+
"loss": 0.6394919157028198,
|
| 112444 |
+
"step": 16062
|
| 112445 |
+
},
|
| 112446 |
+
{
|
| 112447 |
+
"epoch": 19.70920245398773,
|
| 112448 |
+
"grad_norm": 0.24129268527030945,
|
| 112449 |
+
"learning_rate": 2.8534046575964812e-08,
|
| 112450 |
+
"loss": 0.5730476379394531,
|
| 112451 |
+
"step": 16063
|
| 112452 |
+
},
|
| 112453 |
+
{
|
| 112454 |
+
"epoch": 19.71042944785276,
|
| 112455 |
+
"grad_norm": 0.25690650939941406,
|
| 112456 |
+
"learning_rate": 2.8294813561263734e-08,
|
| 112457 |
+
"loss": 0.6591860055923462,
|
| 112458 |
+
"step": 16064
|
| 112459 |
+
},
|
| 112460 |
+
{
|
| 112461 |
+
"epoch": 19.711656441717793,
|
| 112462 |
+
"grad_norm": 0.27134397625923157,
|
| 112463 |
+
"learning_rate": 2.805658708477654e-08,
|
| 112464 |
+
"loss": 0.5581389665603638,
|
| 112465 |
+
"step": 16065
|
| 112466 |
+
},
|
| 112467 |
+
{
|
| 112468 |
+
"epoch": 19.71288343558282,
|
| 112469 |
+
"grad_norm": 0.28178083896636963,
|
| 112470 |
+
"learning_rate": 2.7819367156106667e-08,
|
| 112471 |
+
"loss": 0.6799309253692627,
|
| 112472 |
+
"step": 16066
|
| 112473 |
+
},
|
| 112474 |
+
{
|
| 112475 |
+
"epoch": 19.714110429447853,
|
| 112476 |
+
"grad_norm": 0.28005310893058777,
|
| 112477 |
+
"learning_rate": 2.7583153784815908e-08,
|
| 112478 |
+
"loss": 0.7239497900009155,
|
| 112479 |
+
"step": 16067
|
| 112480 |
+
},
|
| 112481 |
+
{
|
| 112482 |
+
"epoch": 19.715337423312885,
|
| 112483 |
+
"grad_norm": 0.27341434359550476,
|
| 112484 |
+
"learning_rate": 2.734794698042442e-08,
|
| 112485 |
+
"loss": 0.5189157724380493,
|
| 112486 |
+
"step": 16068
|
| 112487 |
+
},
|
| 112488 |
+
{
|
| 112489 |
+
"epoch": 19.716564417177914,
|
| 112490 |
+
"grad_norm": 0.2786354422569275,
|
| 112491 |
+
"learning_rate": 2.711374675241074e-08,
|
| 112492 |
+
"loss": 0.7233799695968628,
|
| 112493 |
+
"step": 16069
|
| 112494 |
+
},
|
| 112495 |
+
{
|
| 112496 |
+
"epoch": 19.717791411042946,
|
| 112497 |
+
"grad_norm": 0.27927231788635254,
|
| 112498 |
+
"learning_rate": 2.6880553110220087e-08,
|
| 112499 |
+
"loss": 0.6193320751190186,
|
| 112500 |
+
"step": 16070
|
| 112501 |
+
},
|
| 112502 |
+
{
|
| 112503 |
+
"epoch": 19.719018404907974,
|
| 112504 |
+
"grad_norm": 0.2907028794288635,
|
| 112505 |
+
"learning_rate": 2.664836606324772e-08,
|
| 112506 |
+
"loss": 0.6454459428787231,
|
| 112507 |
+
"step": 16071
|
| 112508 |
+
},
|
| 112509 |
+
{
|
| 112510 |
+
"epoch": 19.720245398773006,
|
| 112511 |
+
"grad_norm": 0.32196488976478577,
|
| 112512 |
+
"learning_rate": 2.6417185620852823e-08,
|
| 112513 |
+
"loss": 0.5972232818603516,
|
| 112514 |
+
"step": 16072
|
| 112515 |
+
},
|
| 112516 |
+
{
|
| 112517 |
+
"epoch": 19.721472392638038,
|
| 112518 |
+
"grad_norm": 0.2817733585834503,
|
| 112519 |
+
"learning_rate": 2.618701179235572e-08,
|
| 112520 |
+
"loss": 0.5001744031906128,
|
| 112521 |
+
"step": 16073
|
| 112522 |
+
},
|
| 112523 |
+
{
|
| 112524 |
+
"epoch": 19.722699386503066,
|
| 112525 |
+
"grad_norm": 0.25485125184059143,
|
| 112526 |
+
"learning_rate": 2.595784458703232e-08,
|
| 112527 |
+
"loss": 0.5342998504638672,
|
| 112528 |
+
"step": 16074
|
| 112529 |
+
},
|
| 112530 |
+
{
|
| 112531 |
+
"epoch": 19.7239263803681,
|
| 112532 |
+
"grad_norm": 0.2412424385547638,
|
| 112533 |
+
"learning_rate": 2.5729684014119683e-08,
|
| 112534 |
+
"loss": 0.4952559471130371,
|
| 112535 |
+
"step": 16075
|
| 112536 |
+
},
|
| 112537 |
+
{
|
| 112538 |
+
"epoch": 19.72515337423313,
|
| 112539 |
+
"grad_norm": 0.3065892159938812,
|
| 112540 |
+
"learning_rate": 2.5502530082813226e-08,
|
| 112541 |
+
"loss": 0.6449559330940247,
|
| 112542 |
+
"step": 16076
|
| 112543 |
+
},
|
| 112544 |
+
{
|
| 112545 |
+
"epoch": 19.72638036809816,
|
| 112546 |
+
"grad_norm": 0.29340454936027527,
|
| 112547 |
+
"learning_rate": 2.5276382802272292e-08,
|
| 112548 |
+
"loss": 0.5114918351173401,
|
| 112549 |
+
"step": 16077
|
| 112550 |
+
},
|
| 112551 |
+
{
|
| 112552 |
+
"epoch": 19.72760736196319,
|
| 112553 |
+
"grad_norm": 0.26721706986427307,
|
| 112554 |
+
"learning_rate": 2.5051242181609037e-08,
|
| 112555 |
+
"loss": 0.4897948205471039,
|
| 112556 |
+
"step": 16078
|
| 112557 |
+
},
|
| 112558 |
+
{
|
| 112559 |
+
"epoch": 19.72883435582822,
|
| 112560 |
+
"grad_norm": 0.2524581849575043,
|
| 112561 |
+
"learning_rate": 2.482710822989953e-08,
|
| 112562 |
+
"loss": 0.5690096616744995,
|
| 112563 |
+
"step": 16079
|
| 112564 |
+
},
|
| 112565 |
+
{
|
| 112566 |
+
"epoch": 19.73006134969325,
|
| 112567 |
+
"grad_norm": 0.2821669578552246,
|
| 112568 |
+
"learning_rate": 2.4603980956178218e-08,
|
| 112569 |
+
"loss": 0.8166283965110779,
|
| 112570 |
+
"step": 16080
|
| 112571 |
+
},
|
| 112572 |
+
{
|
| 112573 |
+
"epoch": 19.731288343558283,
|
| 112574 |
+
"grad_norm": 0.2750683128833771,
|
| 112575 |
+
"learning_rate": 2.4381860369437905e-08,
|
| 112576 |
+
"loss": 0.5883926153182983,
|
| 112577 |
+
"step": 16081
|
| 112578 |
+
},
|
| 112579 |
+
{
|
| 112580 |
+
"epoch": 19.73251533742331,
|
| 112581 |
+
"grad_norm": 0.29478567838668823,
|
| 112582 |
+
"learning_rate": 2.4160746478632536e-08,
|
| 112583 |
+
"loss": 0.6610470414161682,
|
| 112584 |
+
"step": 16082
|
| 112585 |
+
},
|
| 112586 |
+
{
|
| 112587 |
+
"epoch": 19.733742331288344,
|
| 112588 |
+
"grad_norm": 0.26517975330352783,
|
| 112589 |
+
"learning_rate": 2.3940639292674427e-08,
|
| 112590 |
+
"loss": 0.7135617136955261,
|
| 112591 |
+
"step": 16083
|
| 112592 |
+
},
|
| 112593 |
+
{
|
| 112594 |
+
"epoch": 19.734969325153376,
|
| 112595 |
+
"grad_norm": 0.23761983215808868,
|
| 112596 |
+
"learning_rate": 2.3721538820434264e-08,
|
| 112597 |
+
"loss": 0.4073242247104645,
|
| 112598 |
+
"step": 16084
|
| 112599 |
+
},
|
| 112600 |
+
{
|
| 112601 |
+
"epoch": 19.736196319018404,
|
| 112602 |
+
"grad_norm": 0.27607834339141846,
|
| 112603 |
+
"learning_rate": 2.3503445070746645e-08,
|
| 112604 |
+
"loss": 0.569398820400238,
|
| 112605 |
+
"step": 16085
|
| 112606 |
+
},
|
| 112607 |
+
{
|
| 112608 |
+
"epoch": 19.737423312883436,
|
| 112609 |
+
"grad_norm": 0.25899583101272583,
|
| 112610 |
+
"learning_rate": 2.3286358052398983e-08,
|
| 112611 |
+
"loss": 0.5767850875854492,
|
| 112612 |
+
"step": 16086
|
| 112613 |
+
},
|
| 112614 |
+
{
|
| 112615 |
+
"epoch": 19.738650306748465,
|
| 112616 |
+
"grad_norm": 0.30660712718963623,
|
| 112617 |
+
"learning_rate": 2.3070277774139836e-08,
|
| 112618 |
+
"loss": 0.6670733094215393,
|
| 112619 |
+
"step": 16087
|
| 112620 |
+
},
|
| 112621 |
+
{
|
| 112622 |
+
"epoch": 19.739877300613497,
|
| 112623 |
+
"grad_norm": 0.24769040942192078,
|
| 112624 |
+
"learning_rate": 2.285520424468446e-08,
|
| 112625 |
+
"loss": 0.5839700698852539,
|
| 112626 |
+
"step": 16088
|
| 112627 |
+
},
|
| 112628 |
+
{
|
| 112629 |
+
"epoch": 19.74110429447853,
|
| 112630 |
+
"grad_norm": 0.29058071970939636,
|
| 112631 |
+
"learning_rate": 2.2641137472698136e-08,
|
| 112632 |
+
"loss": 0.5117849707603455,
|
| 112633 |
+
"step": 16089
|
| 112634 |
+
},
|
| 112635 |
+
{
|
| 112636 |
+
"epoch": 19.742331288343557,
|
| 112637 |
+
"grad_norm": 0.2758978307247162,
|
| 112638 |
+
"learning_rate": 2.2428077466807307e-08,
|
| 112639 |
+
"loss": 0.612994909286499,
|
| 112640 |
+
"step": 16090
|
| 112641 |
+
},
|
| 112642 |
+
{
|
| 112643 |
+
"epoch": 19.74355828220859,
|
| 112644 |
+
"grad_norm": 0.2502864599227905,
|
| 112645 |
+
"learning_rate": 2.2216024235605092e-08,
|
| 112646 |
+
"loss": 0.49561208486557007,
|
| 112647 |
+
"step": 16091
|
| 112648 |
+
},
|
| 112649 |
+
{
|
| 112650 |
+
"epoch": 19.74478527607362,
|
| 112651 |
+
"grad_norm": 0.2722310423851013,
|
| 112652 |
+
"learning_rate": 2.2004977787634663e-08,
|
| 112653 |
+
"loss": 0.46982502937316895,
|
| 112654 |
+
"step": 16092
|
| 112655 |
+
},
|
| 112656 |
+
{
|
| 112657 |
+
"epoch": 19.74601226993865,
|
| 112658 |
+
"grad_norm": 0.3121661841869354,
|
| 112659 |
+
"learning_rate": 2.1794938131405873e-08,
|
| 112660 |
+
"loss": 0.6763758063316345,
|
| 112661 |
+
"step": 16093
|
| 112662 |
+
},
|
| 112663 |
+
{
|
| 112664 |
+
"epoch": 19.74723926380368,
|
| 112665 |
+
"grad_norm": 0.2213575690984726,
|
| 112666 |
+
"learning_rate": 2.15859052753814e-08,
|
| 112667 |
+
"loss": 0.5490544438362122,
|
| 112668 |
+
"step": 16094
|
| 112669 |
+
},
|
| 112670 |
+
{
|
| 112671 |
+
"epoch": 19.74846625766871,
|
| 112672 |
+
"grad_norm": 0.23200438916683197,
|
| 112673 |
+
"learning_rate": 2.137787922798784e-08,
|
| 112674 |
+
"loss": 0.5354525446891785,
|
| 112675 |
+
"step": 16095
|
| 112676 |
+
},
|
| 112677 |
+
{
|
| 112678 |
+
"epoch": 19.749693251533742,
|
| 112679 |
+
"grad_norm": 0.26940786838531494,
|
| 112680 |
+
"learning_rate": 2.1170859997612925e-08,
|
| 112681 |
+
"loss": 0.5801250338554382,
|
| 112682 |
+
"step": 16096
|
| 112683 |
+
},
|
| 112684 |
+
{
|
| 112685 |
+
"epoch": 19.750920245398774,
|
| 112686 |
+
"grad_norm": 0.25600069761276245,
|
| 112687 |
+
"learning_rate": 2.0964847592597204e-08,
|
| 112688 |
+
"loss": 0.5844970345497131,
|
| 112689 |
+
"step": 16097
|
| 112690 |
+
},
|
| 112691 |
+
{
|
| 112692 |
+
"epoch": 19.752147239263802,
|
| 112693 |
+
"grad_norm": 0.2807966470718384,
|
| 112694 |
+
"learning_rate": 2.0759842021247922e-08,
|
| 112695 |
+
"loss": 0.5886245369911194,
|
| 112696 |
+
"step": 16098
|
| 112697 |
+
},
|
| 112698 |
+
{
|
| 112699 |
+
"epoch": 19.753374233128834,
|
| 112700 |
+
"grad_norm": 0.3036273717880249,
|
| 112701 |
+
"learning_rate": 2.055584329182514e-08,
|
| 112702 |
+
"loss": 0.41089171171188354,
|
| 112703 |
+
"step": 16099
|
| 112704 |
+
},
|
| 112705 |
+
{
|
| 112706 |
+
"epoch": 19.754601226993866,
|
| 112707 |
+
"grad_norm": 0.2875816524028778,
|
| 112708 |
+
"learning_rate": 2.0352851412552832e-08,
|
| 112709 |
+
"loss": 0.6999335289001465,
|
| 112710 |
+
"step": 16100
|
| 112711 |
+
},
|
| 112712 |
+
{
|
| 112713 |
+
"epoch": 19.755828220858895,
|
| 112714 |
+
"grad_norm": 0.2529551684856415,
|
| 112715 |
+
"learning_rate": 2.0150866391613345e-08,
|
| 112716 |
+
"loss": 0.6482492685317993,
|
| 112717 |
+
"step": 16101
|
| 112718 |
+
},
|
| 112719 |
+
{
|
| 112720 |
+
"epoch": 19.757055214723927,
|
| 112721 |
+
"grad_norm": 0.25869661569595337,
|
| 112722 |
+
"learning_rate": 1.9949888237147385e-08,
|
| 112723 |
+
"loss": 0.5927771329879761,
|
| 112724 |
+
"step": 16102
|
| 112725 |
+
},
|
| 112726 |
+
{
|
| 112727 |
+
"epoch": 19.758282208588955,
|
| 112728 |
+
"grad_norm": 0.2676050662994385,
|
| 112729 |
+
"learning_rate": 1.9749916957254034e-08,
|
| 112730 |
+
"loss": 0.4569295346736908,
|
| 112731 |
+
"step": 16103
|
| 112732 |
+
},
|
| 112733 |
+
{
|
| 112734 |
+
"epoch": 19.759509202453987,
|
| 112735 |
+
"grad_norm": 0.24169577658176422,
|
| 112736 |
+
"learning_rate": 1.9550952559996284e-08,
|
| 112737 |
+
"loss": 0.4544190764427185,
|
| 112738 |
+
"step": 16104
|
| 112739 |
+
},
|
| 112740 |
+
{
|
| 112741 |
+
"epoch": 19.76073619631902,
|
| 112742 |
+
"grad_norm": 0.2864413857460022,
|
| 112743 |
+
"learning_rate": 1.9352995053395494e-08,
|
| 112744 |
+
"loss": 0.6269335746765137,
|
| 112745 |
+
"step": 16105
|
| 112746 |
+
},
|
| 112747 |
+
{
|
| 112748 |
+
"epoch": 19.761963190184048,
|
| 112749 |
+
"grad_norm": 0.2390204519033432,
|
| 112750 |
+
"learning_rate": 1.9156044445428624e-08,
|
| 112751 |
+
"loss": 0.5719627141952515,
|
| 112752 |
+
"step": 16106
|
| 112753 |
+
},
|
| 112754 |
+
{
|
| 112755 |
+
"epoch": 19.76319018404908,
|
| 112756 |
+
"grad_norm": 0.24103528261184692,
|
| 112757 |
+
"learning_rate": 1.8960100744030983e-08,
|
| 112758 |
+
"loss": 0.5354752540588379,
|
| 112759 |
+
"step": 16107
|
| 112760 |
+
},
|
| 112761 |
+
{
|
| 112762 |
+
"epoch": 19.764417177914112,
|
| 112763 |
+
"grad_norm": 0.28023040294647217,
|
| 112764 |
+
"learning_rate": 1.8765163957104593e-08,
|
| 112765 |
+
"loss": 0.6185349225997925,
|
| 112766 |
+
"step": 16108
|
| 112767 |
+
},
|
| 112768 |
+
{
|
| 112769 |
+
"epoch": 19.76564417177914,
|
| 112770 |
+
"grad_norm": 0.26762810349464417,
|
| 112771 |
+
"learning_rate": 1.8571234092507052e-08,
|
| 112772 |
+
"loss": 0.6955153942108154,
|
| 112773 |
+
"step": 16109
|
| 112774 |
+
},
|
| 112775 |
+
{
|
| 112776 |
+
"epoch": 19.766871165644172,
|
| 112777 |
+
"grad_norm": 0.26790452003479004,
|
| 112778 |
+
"learning_rate": 1.8378311158051554e-08,
|
| 112779 |
+
"loss": 0.6795819997787476,
|
| 112780 |
+
"step": 16110
|
| 112781 |
+
},
|
| 112782 |
+
{
|
| 112783 |
+
"epoch": 19.7680981595092,
|
| 112784 |
+
"grad_norm": 0.27976611256599426,
|
| 112785 |
+
"learning_rate": 1.8186395161520764e-08,
|
| 112786 |
+
"loss": 0.6870114207267761,
|
| 112787 |
+
"step": 16111
|
| 112788 |
+
},
|
| 112789 |
+
{
|
| 112790 |
+
"epoch": 19.769325153374233,
|
| 112791 |
+
"grad_norm": 0.3037409484386444,
|
| 112792 |
+
"learning_rate": 1.7995486110641835e-08,
|
| 112793 |
+
"loss": 0.5843106508255005,
|
| 112794 |
+
"step": 16112
|
| 112795 |
+
},
|
| 112796 |
+
{
|
| 112797 |
+
"epoch": 19.770552147239265,
|
| 112798 |
+
"grad_norm": 0.29687419533729553,
|
| 112799 |
+
"learning_rate": 1.780558401311416e-08,
|
| 112800 |
+
"loss": 0.581790566444397,
|
| 112801 |
+
"step": 16113
|
| 112802 |
+
},
|
| 112803 |
+
{
|
| 112804 |
+
"epoch": 19.771779141104293,
|
| 112805 |
+
"grad_norm": 0.24483460187911987,
|
| 112806 |
+
"learning_rate": 1.7616688876592734e-08,
|
| 112807 |
+
"loss": 0.567284107208252,
|
| 112808 |
+
"step": 16114
|
| 112809 |
+
},
|
| 112810 |
+
{
|
| 112811 |
+
"epoch": 19.773006134969325,
|
| 112812 |
+
"grad_norm": 0.25496798753738403,
|
| 112813 |
+
"learning_rate": 1.7428800708693683e-08,
|
| 112814 |
+
"loss": 0.6368364095687866,
|
| 112815 |
+
"step": 16115
|
| 112816 |
+
},
|
| 112817 |
+
{
|
| 112818 |
+
"epoch": 19.774233128834357,
|
| 112819 |
+
"grad_norm": 0.25047779083251953,
|
| 112820 |
+
"learning_rate": 1.7241919516983175e-08,
|
| 112821 |
+
"loss": 0.5190349817276001,
|
| 112822 |
+
"step": 16116
|
| 112823 |
+
},
|
| 112824 |
+
{
|
| 112825 |
+
"epoch": 19.775460122699386,
|
| 112826 |
+
"grad_norm": 0.28317609429359436,
|
| 112827 |
+
"learning_rate": 1.7056045308999623e-08,
|
| 112828 |
+
"loss": 0.601601243019104,
|
| 112829 |
+
"step": 16117
|
| 112830 |
+
},
|
| 112831 |
+
{
|
| 112832 |
+
"epoch": 19.776687116564418,
|
| 112833 |
+
"grad_norm": 0.2964201867580414,
|
| 112834 |
+
"learning_rate": 1.687117809223149e-08,
|
| 112835 |
+
"loss": 0.6052480340003967,
|
| 112836 |
+
"step": 16118
|
| 112837 |
+
},
|
| 112838 |
+
{
|
| 112839 |
+
"epoch": 19.77791411042945,
|
| 112840 |
+
"grad_norm": 0.2601813077926636,
|
| 112841 |
+
"learning_rate": 1.6687317874133913e-08,
|
| 112842 |
+
"loss": 0.5624167919158936,
|
| 112843 |
+
"step": 16119
|
| 112844 |
+
},
|
| 112845 |
+
{
|
| 112846 |
+
"epoch": 19.779141104294478,
|
| 112847 |
+
"grad_norm": 0.2339327335357666,
|
| 112848 |
+
"learning_rate": 1.650446466211486e-08,
|
| 112849 |
+
"loss": 0.49464109539985657,
|
| 112850 |
+
"step": 16120
|
| 112851 |
+
},
|
| 112852 |
+
{
|
| 112853 |
+
"epoch": 19.78036809815951,
|
| 112854 |
+
"grad_norm": 0.2931138873100281,
|
| 112855 |
+
"learning_rate": 1.6322618463546212e-08,
|
| 112856 |
+
"loss": 0.7361763715744019,
|
| 112857 |
+
"step": 16121
|
| 112858 |
+
},
|
| 112859 |
+
{
|
| 112860 |
+
"epoch": 19.78159509202454,
|
| 112861 |
+
"grad_norm": 0.28705254197120667,
|
| 112862 |
+
"learning_rate": 1.6141779285758217e-08,
|
| 112863 |
+
"loss": 0.6358294486999512,
|
| 112864 |
+
"step": 16122
|
| 112865 |
+
},
|
| 112866 |
+
{
|
| 112867 |
+
"epoch": 19.78282208588957,
|
| 112868 |
+
"grad_norm": 0.25853028893470764,
|
| 112869 |
+
"learning_rate": 1.5961947136036715e-08,
|
| 112870 |
+
"loss": 0.5759102702140808,
|
| 112871 |
+
"step": 16123
|
| 112872 |
+
},
|
| 112873 |
+
{
|
| 112874 |
+
"epoch": 19.784049079754602,
|
| 112875 |
+
"grad_norm": 0.2429991215467453,
|
| 112876 |
+
"learning_rate": 1.5783122021634233e-08,
|
| 112877 |
+
"loss": 0.5402126312255859,
|
| 112878 |
+
"step": 16124
|
| 112879 |
+
},
|
| 112880 |
+
{
|
| 112881 |
+
"epoch": 19.78527607361963,
|
| 112882 |
+
"grad_norm": 0.2798876464366913,
|
| 112883 |
+
"learning_rate": 1.5605303949756124e-08,
|
| 112884 |
+
"loss": 0.5620956420898438,
|
| 112885 |
+
"step": 16125
|
| 112886 |
+
},
|
| 112887 |
+
{
|
| 112888 |
+
"epoch": 19.786503067484663,
|
| 112889 |
+
"grad_norm": 0.26559048891067505,
|
| 112890 |
+
"learning_rate": 1.5428492927571648e-08,
|
| 112891 |
+
"loss": 0.4874908924102783,
|
| 112892 |
+
"step": 16126
|
| 112893 |
+
},
|
| 112894 |
+
{
|
| 112895 |
+
"epoch": 19.787730061349695,
|
| 112896 |
+
"grad_norm": 0.24992987513542175,
|
| 112897 |
+
"learning_rate": 1.5252688962202886e-08,
|
| 112898 |
+
"loss": 0.5381543040275574,
|
| 112899 |
+
"step": 16127
|
| 112900 |
+
},
|
| 112901 |
+
{
|
| 112902 |
+
"epoch": 19.788957055214723,
|
| 112903 |
+
"grad_norm": 0.26071953773498535,
|
| 112904 |
+
"learning_rate": 1.5077892060741394e-08,
|
| 112905 |
+
"loss": 0.5717687010765076,
|
| 112906 |
+
"step": 16128
|
| 112907 |
+
},
|
| 112908 |
+
{
|
| 112909 |
+
"epoch": 19.790184049079755,
|
| 112910 |
+
"grad_norm": 0.23289570212364197,
|
| 112911 |
+
"learning_rate": 1.490410223023153e-08,
|
| 112912 |
+
"loss": 0.5691318511962891,
|
| 112913 |
+
"step": 16129
|
| 112914 |
+
},
|
| 112915 |
+
{
|
| 112916 |
+
"epoch": 19.791411042944784,
|
| 112917 |
+
"grad_norm": 0.3143908679485321,
|
| 112918 |
+
"learning_rate": 1.4731319477678806e-08,
|
| 112919 |
+
"loss": 0.7563022375106812,
|
| 112920 |
+
"step": 16130
|
| 112921 |
+
},
|
| 112922 |
+
{
|
| 112923 |
+
"epoch": 19.792638036809816,
|
| 112924 |
+
"grad_norm": 0.2691217362880707,
|
| 112925 |
+
"learning_rate": 1.4559543810044318e-08,
|
| 112926 |
+
"loss": 0.5809985995292664,
|
| 112927 |
+
"step": 16131
|
| 112928 |
+
},
|
| 112929 |
+
{
|
| 112930 |
+
"epoch": 19.793865030674848,
|
| 112931 |
+
"grad_norm": 0.2620144784450531,
|
| 112932 |
+
"learning_rate": 1.4388775234255857e-08,
|
| 112933 |
+
"loss": 0.6333280205726624,
|
| 112934 |
+
"step": 16132
|
| 112935 |
+
},
|
| 112936 |
+
{
|
| 112937 |
+
"epoch": 19.795092024539876,
|
| 112938 |
+
"grad_norm": 0.31869399547576904,
|
| 112939 |
+
"learning_rate": 1.4219013757191257e-08,
|
| 112940 |
+
"loss": 0.47401362657546997,
|
| 112941 |
+
"step": 16133
|
| 112942 |
+
},
|
| 112943 |
+
{
|
| 112944 |
+
"epoch": 19.79631901840491,
|
| 112945 |
+
"grad_norm": 0.2688331604003906,
|
| 112946 |
+
"learning_rate": 1.4050259385700593e-08,
|
| 112947 |
+
"loss": 0.725259006023407,
|
| 112948 |
+
"step": 16134
|
| 112949 |
+
},
|
| 112950 |
+
{
|
| 112951 |
+
"epoch": 19.79754601226994,
|
| 112952 |
+
"grad_norm": 0.27281084656715393,
|
| 112953 |
+
"learning_rate": 1.3882512126578429e-08,
|
| 112954 |
+
"loss": 0.8627501726150513,
|
| 112955 |
+
"step": 16135
|
| 112956 |
+
},
|
| 112957 |
+
{
|
| 112958 |
+
"epoch": 19.79877300613497,
|
| 112959 |
+
"grad_norm": 0.24760672450065613,
|
| 112960 |
+
"learning_rate": 1.3715771986591575e-08,
|
| 112961 |
+
"loss": 0.5549024343490601,
|
| 112962 |
+
"step": 16136
|
| 112963 |
+
},
|
| 112964 |
+
{
|
| 112965 |
+
"epoch": 19.8,
|
| 112966 |
+
"grad_norm": 0.2531871497631073,
|
| 112967 |
+
"learning_rate": 1.355003897245688e-08,
|
| 112968 |
+
"loss": 0.4592863619327545,
|
| 112969 |
+
"step": 16137
|
| 112970 |
+
},
|
| 112971 |
+
{
|
| 112972 |
+
"epoch": 19.80122699386503,
|
| 112973 |
+
"grad_norm": 0.2760011553764343,
|
| 112974 |
+
"learning_rate": 1.3385313090857887e-08,
|
| 112975 |
+
"loss": 0.5727841258049011,
|
| 112976 |
+
"step": 16138
|
| 112977 |
+
},
|
| 112978 |
+
{
|
| 112979 |
+
"epoch": 19.80245398773006,
|
| 112980 |
+
"grad_norm": 0.2407931238412857,
|
| 112981 |
+
"learning_rate": 1.322159434843373e-08,
|
| 112982 |
+
"loss": 0.2982398271560669,
|
| 112983 |
+
"step": 16139
|
| 112984 |
+
},
|
| 112985 |
+
{
|
| 112986 |
+
"epoch": 19.803680981595093,
|
| 112987 |
+
"grad_norm": 0.26291385293006897,
|
| 112988 |
+
"learning_rate": 1.3058882751781909e-08,
|
| 112989 |
+
"loss": 0.7060896158218384,
|
| 112990 |
+
"step": 16140
|
| 112991 |
+
},
|
| 112992 |
+
{
|
| 112993 |
+
"epoch": 19.80490797546012,
|
| 112994 |
+
"grad_norm": 0.27488279342651367,
|
| 112995 |
+
"learning_rate": 1.2897178307461067e-08,
|
| 112996 |
+
"loss": 0.44591644406318665,
|
| 112997 |
+
"step": 16141
|
| 112998 |
+
},
|
| 112999 |
+
{
|
| 113000 |
+
"epoch": 19.806134969325154,
|
| 113001 |
+
"grad_norm": 0.2684621810913086,
|
| 113002 |
+
"learning_rate": 1.2736481021990987e-08,
|
| 113003 |
+
"loss": 0.5543627142906189,
|
| 113004 |
+
"step": 16142
|
| 113005 |
+
},
|
| 113006 |
+
{
|
| 113007 |
+
"epoch": 19.807361963190186,
|
| 113008 |
+
"grad_norm": 0.26200249791145325,
|
| 113009 |
+
"learning_rate": 1.257679090184427e-08,
|
| 113010 |
+
"loss": 0.5632297396659851,
|
| 113011 |
+
"step": 16143
|
| 113012 |
+
},
|
| 113013 |
+
{
|
| 113014 |
+
"epoch": 19.808588957055214,
|
| 113015 |
+
"grad_norm": 0.34392958879470825,
|
| 113016 |
+
"learning_rate": 1.2418107953462988e-08,
|
| 113017 |
+
"loss": 0.5739174485206604,
|
| 113018 |
+
"step": 16144
|
| 113019 |
+
},
|
| 113020 |
+
{
|
| 113021 |
+
"epoch": 19.809815950920246,
|
| 113022 |
+
"grad_norm": 0.27134230732917786,
|
| 113023 |
+
"learning_rate": 1.2260432183242021e-08,
|
| 113024 |
+
"loss": 0.5653071403503418,
|
| 113025 |
+
"step": 16145
|
| 113026 |
+
},
|
| 113027 |
+
{
|
| 113028 |
+
"epoch": 19.811042944785274,
|
| 113029 |
+
"grad_norm": 0.2700212299823761,
|
| 113030 |
+
"learning_rate": 1.2103763597534622e-08,
|
| 113031 |
+
"loss": 0.6311249136924744,
|
| 113032 |
+
"step": 16146
|
| 113033 |
+
},
|
| 113034 |
+
{
|
| 113035 |
+
"epoch": 19.812269938650306,
|
| 113036 |
+
"grad_norm": 0.3041534423828125,
|
| 113037 |
+
"learning_rate": 1.1948102202655187e-08,
|
| 113038 |
+
"loss": 0.553008496761322,
|
| 113039 |
+
"step": 16147
|
| 113040 |
+
},
|
| 113041 |
+
{
|
| 113042 |
+
"epoch": 19.81349693251534,
|
| 113043 |
+
"grad_norm": 0.2412063181400299,
|
| 113044 |
+
"learning_rate": 1.1793448004882024e-08,
|
| 113045 |
+
"loss": 0.34542107582092285,
|
| 113046 |
+
"step": 16148
|
| 113047 |
+
},
|
| 113048 |
+
{
|
| 113049 |
+
"epoch": 19.814723926380367,
|
| 113050 |
+
"grad_norm": 0.29806700348854065,
|
| 113051 |
+
"learning_rate": 1.163980101044626e-08,
|
| 113052 |
+
"loss": 0.6542364358901978,
|
| 113053 |
+
"step": 16149
|
| 113054 |
+
},
|
| 113055 |
+
{
|
| 113056 |
+
"epoch": 19.8159509202454,
|
| 113057 |
+
"grad_norm": 0.2696397006511688,
|
| 113058 |
+
"learning_rate": 1.1487161225540167e-08,
|
| 113059 |
+
"loss": 0.6095598936080933,
|
| 113060 |
+
"step": 16150
|
| 113061 |
+
},
|
| 113062 |
+
{
|
| 113063 |
+
"epoch": 19.81717791411043,
|
| 113064 |
+
"grad_norm": 0.24587808549404144,
|
| 113065 |
+
"learning_rate": 1.1335528656317151e-08,
|
| 113066 |
+
"loss": 0.5262739658355713,
|
| 113067 |
+
"step": 16151
|
| 113068 |
+
},
|
| 113069 |
+
{
|
| 113070 |
+
"epoch": 19.81840490797546,
|
| 113071 |
+
"grad_norm": 0.2562694251537323,
|
| 113072 |
+
"learning_rate": 1.1184903308888995e-08,
|
| 113073 |
+
"loss": 0.7235836982727051,
|
| 113074 |
+
"step": 16152
|
| 113075 |
+
},
|
| 113076 |
+
{
|
| 113077 |
+
"epoch": 19.81963190184049,
|
| 113078 |
+
"grad_norm": 0.26316675543785095,
|
| 113079 |
+
"learning_rate": 1.1035285189325839e-08,
|
| 113080 |
+
"loss": 0.685309886932373,
|
| 113081 |
+
"step": 16153
|
| 113082 |
+
},
|
| 113083 |
+
{
|
| 113084 |
+
"epoch": 19.82085889570552,
|
| 113085 |
+
"grad_norm": 0.23587004840373993,
|
| 113086 |
+
"learning_rate": 1.0886674303661749e-08,
|
| 113087 |
+
"loss": 0.4574720859527588,
|
| 113088 |
+
"step": 16154
|
| 113089 |
+
},
|
| 113090 |
+
{
|
| 113091 |
+
"epoch": 19.822085889570552,
|
| 113092 |
+
"grad_norm": 0.2571620047092438,
|
| 113093 |
+
"learning_rate": 1.0739070657883598e-08,
|
| 113094 |
+
"loss": 0.5842510461807251,
|
| 113095 |
+
"step": 16155
|
| 113096 |
+
},
|
| 113097 |
+
{
|
| 113098 |
+
"epoch": 19.823312883435584,
|
| 113099 |
+
"grad_norm": 0.29056602716445923,
|
| 113100 |
+
"learning_rate": 1.0592474257942186e-08,
|
| 113101 |
+
"loss": 0.657039999961853,
|
| 113102 |
+
"step": 16156
|
| 113103 |
+
},
|
| 113104 |
+
{
|
| 113105 |
+
"epoch": 19.824539877300612,
|
| 113106 |
+
"grad_norm": 0.2734782099723816,
|
| 113107 |
+
"learning_rate": 1.0446885109746673e-08,
|
| 113108 |
+
"loss": 0.5873494148254395,
|
| 113109 |
+
"step": 16157
|
| 113110 |
+
},
|
| 113111 |
+
{
|
| 113112 |
+
"epoch": 19.825766871165644,
|
| 113113 |
+
"grad_norm": 0.30578967928886414,
|
| 113114 |
+
"learning_rate": 1.0302303219161813e-08,
|
| 113115 |
+
"loss": 0.6396012306213379,
|
| 113116 |
+
"step": 16158
|
| 113117 |
+
},
|
| 113118 |
+
{
|
| 113119 |
+
"epoch": 19.826993865030676,
|
| 113120 |
+
"grad_norm": 0.2994247078895569,
|
| 113121 |
+
"learning_rate": 1.0158728592021826e-08,
|
| 113122 |
+
"loss": 0.4040101170539856,
|
| 113123 |
+
"step": 16159
|
| 113124 |
+
},
|
| 113125 |
+
{
|
| 113126 |
+
"epoch": 19.828220858895705,
|
| 113127 |
+
"grad_norm": 0.29394763708114624,
|
| 113128 |
+
"learning_rate": 1.00161612341082e-08,
|
| 113129 |
+
"loss": 0.5635397434234619,
|
| 113130 |
+
"step": 16160
|
| 113131 |
+
},
|
| 113132 |
+
{
|
| 113133 |
+
"epoch": 19.829447852760737,
|
| 113134 |
+
"grad_norm": 0.2679688036441803,
|
| 113135 |
+
"learning_rate": 9.874601151171892e-09,
|
| 113136 |
+
"loss": 0.5635303854942322,
|
| 113137 |
+
"step": 16161
|
| 113138 |
+
},
|
| 113139 |
+
{
|
| 113140 |
+
"epoch": 19.830674846625765,
|
| 113141 |
+
"grad_norm": 0.25002577900886536,
|
| 113142 |
+
"learning_rate": 9.734048348913893e-09,
|
| 113143 |
+
"loss": 0.40179643034935,
|
| 113144 |
+
"step": 16162
|
| 113145 |
+
},
|
| 113146 |
+
{
|
| 113147 |
+
"epoch": 19.831901840490797,
|
| 113148 |
+
"grad_norm": 0.23891015350818634,
|
| 113149 |
+
"learning_rate": 9.594502833001895e-09,
|
| 113150 |
+
"loss": 0.5011698603630066,
|
| 113151 |
+
"step": 16163
|
| 113152 |
+
},
|
| 113153 |
+
{
|
| 113154 |
+
"epoch": 19.83312883435583,
|
| 113155 |
+
"grad_norm": 0.2804563641548157,
|
| 113156 |
+
"learning_rate": 9.455964609061951e-09,
|
| 113157 |
+
"loss": 0.3717347979545593,
|
| 113158 |
+
"step": 16164
|
| 113159 |
+
},
|
| 113160 |
+
{
|
| 113161 |
+
"epoch": 19.834355828220858,
|
| 113162 |
+
"grad_norm": 0.28568193316459656,
|
| 113163 |
+
"learning_rate": 9.318433682678484e-09,
|
| 113164 |
+
"loss": 0.7565097808837891,
|
| 113165 |
+
"step": 16165
|
| 113166 |
+
},
|
| 113167 |
+
{
|
| 113168 |
+
"epoch": 19.83558282208589,
|
| 113169 |
+
"grad_norm": 0.24735027551651,
|
| 113170 |
+
"learning_rate": 9.181910059388733e-09,
|
| 113171 |
+
"loss": 0.5839478969573975,
|
| 113172 |
+
"step": 16166
|
| 113173 |
+
},
|
| 113174 |
+
{
|
| 113175 |
+
"epoch": 19.83680981595092,
|
| 113176 |
+
"grad_norm": 0.2715287208557129,
|
| 113177 |
+
"learning_rate": 9.046393744702176e-09,
|
| 113178 |
+
"loss": 0.647666335105896,
|
| 113179 |
+
"step": 16167
|
| 113180 |
+
},
|
| 113181 |
+
{
|
| 113182 |
+
"epoch": 19.83803680981595,
|
| 113183 |
+
"grad_norm": 0.268315851688385,
|
| 113184 |
+
"learning_rate": 8.911884744081112e-09,
|
| 113185 |
+
"loss": 0.5426980257034302,
|
| 113186 |
+
"step": 16168
|
| 113187 |
+
},
|
| 113188 |
+
{
|
| 113189 |
+
"epoch": 19.839263803680982,
|
| 113190 |
+
"grad_norm": 0.25851765275001526,
|
| 113191 |
+
"learning_rate": 8.778383062943429e-09,
|
| 113192 |
+
"loss": 0.38326501846313477,
|
| 113193 |
+
"step": 16169
|
| 113194 |
+
},
|
| 113195 |
+
{
|
| 113196 |
+
"epoch": 19.84049079754601,
|
| 113197 |
+
"grad_norm": 0.2651871144771576,
|
| 113198 |
+
"learning_rate": 8.645888706670935e-09,
|
| 113199 |
+
"loss": 0.5296850800514221,
|
| 113200 |
+
"step": 16170
|
| 113201 |
+
},
|
| 113202 |
+
{
|
| 113203 |
+
"epoch": 19.841717791411043,
|
| 113204 |
+
"grad_norm": 0.2683587670326233,
|
| 113205 |
+
"learning_rate": 8.514401680606577e-09,
|
| 113206 |
+
"loss": 0.6749495267868042,
|
| 113207 |
+
"step": 16171
|
| 113208 |
+
},
|
| 113209 |
+
{
|
| 113210 |
+
"epoch": 19.842944785276075,
|
| 113211 |
+
"grad_norm": 0.29069486260414124,
|
| 113212 |
+
"learning_rate": 8.383921990048892e-09,
|
| 113213 |
+
"loss": 0.7353404760360718,
|
| 113214 |
+
"step": 16172
|
| 113215 |
+
},
|
| 113216 |
+
{
|
| 113217 |
+
"epoch": 19.844171779141103,
|
| 113218 |
+
"grad_norm": 0.2755039930343628,
|
| 113219 |
+
"learning_rate": 8.254449640257567e-09,
|
| 113220 |
+
"loss": 0.6960476040840149,
|
| 113221 |
+
"step": 16173
|
| 113222 |
+
},
|
| 113223 |
+
{
|
| 113224 |
+
"epoch": 19.845398773006135,
|
| 113225 |
+
"grad_norm": 0.26910510659217834,
|
| 113226 |
+
"learning_rate": 8.12598463644787e-09,
|
| 113227 |
+
"loss": 0.571635365486145,
|
| 113228 |
+
"step": 16174
|
| 113229 |
+
},
|
| 113230 |
+
{
|
| 113231 |
+
"epoch": 19.846625766871167,
|
| 113232 |
+
"grad_norm": 0.2472212314605713,
|
| 113233 |
+
"learning_rate": 7.998526983801768e-09,
|
| 113234 |
+
"loss": 0.4922823905944824,
|
| 113235 |
+
"step": 16175
|
| 113236 |
+
},
|
| 113237 |
+
{
|
| 113238 |
+
"epoch": 19.847852760736195,
|
| 113239 |
+
"grad_norm": 0.2671893537044525,
|
| 113240 |
+
"learning_rate": 7.872076687456819e-09,
|
| 113241 |
+
"loss": 0.633426308631897,
|
| 113242 |
+
"step": 16176
|
| 113243 |
+
},
|
| 113244 |
+
{
|
| 113245 |
+
"epoch": 19.849079754601227,
|
| 113246 |
+
"grad_norm": 0.2767641842365265,
|
| 113247 |
+
"learning_rate": 7.746633752508948e-09,
|
| 113248 |
+
"loss": 0.6906798481941223,
|
| 113249 |
+
"step": 16177
|
| 113250 |
+
},
|
| 113251 |
+
{
|
| 113252 |
+
"epoch": 19.85030674846626,
|
| 113253 |
+
"grad_norm": 0.24988150596618652,
|
| 113254 |
+
"learning_rate": 7.62219818401244e-09,
|
| 113255 |
+
"loss": 0.46294450759887695,
|
| 113256 |
+
"step": 16178
|
| 113257 |
+
},
|
| 113258 |
+
{
|
| 113259 |
+
"epoch": 19.851533742331288,
|
| 113260 |
+
"grad_norm": 0.25956326723098755,
|
| 113261 |
+
"learning_rate": 7.49876998698551e-09,
|
| 113262 |
+
"loss": 0.5465369820594788,
|
| 113263 |
+
"step": 16179
|
| 113264 |
+
},
|
| 113265 |
+
{
|
| 113266 |
+
"epoch": 19.85276073619632,
|
| 113267 |
+
"grad_norm": 0.29296278953552246,
|
| 113268 |
+
"learning_rate": 7.376349166401952e-09,
|
| 113269 |
+
"loss": 0.5256083011627197,
|
| 113270 |
+
"step": 16180
|
| 113271 |
+
},
|
| 113272 |
+
{
|
| 113273 |
+
"epoch": 19.85398773006135,
|
| 113274 |
+
"grad_norm": 0.2679578363895416,
|
| 113275 |
+
"learning_rate": 7.25493572719671e-09,
|
| 113276 |
+
"loss": 0.7635791301727295,
|
| 113277 |
+
"step": 16181
|
| 113278 |
+
},
|
| 113279 |
+
{
|
| 113280 |
+
"epoch": 19.85521472392638,
|
| 113281 |
+
"grad_norm": 0.2297963947057724,
|
| 113282 |
+
"learning_rate": 7.1345296742630905e-09,
|
| 113283 |
+
"loss": 0.5203582644462585,
|
| 113284 |
+
"step": 16182
|
| 113285 |
+
},
|
| 113286 |
+
{
|
| 113287 |
+
"epoch": 19.856441717791412,
|
| 113288 |
+
"grad_norm": 0.2883889973163605,
|
| 113289 |
+
"learning_rate": 7.015131012455545e-09,
|
| 113290 |
+
"loss": 0.7470743060112,
|
| 113291 |
+
"step": 16183
|
| 113292 |
+
},
|
| 113293 |
+
{
|
| 113294 |
+
"epoch": 19.85766871165644,
|
| 113295 |
+
"grad_norm": 0.27914828062057495,
|
| 113296 |
+
"learning_rate": 6.8967397465868884e-09,
|
| 113297 |
+
"loss": 0.818249523639679,
|
| 113298 |
+
"step": 16184
|
| 113299 |
+
},
|
| 113300 |
+
{
|
| 113301 |
+
"epoch": 19.858895705521473,
|
| 113302 |
+
"grad_norm": 0.24401293694972992,
|
| 113303 |
+
"learning_rate": 6.779355881425531e-09,
|
| 113304 |
+
"loss": 0.4289743900299072,
|
| 113305 |
+
"step": 16185
|
| 113306 |
+
},
|
| 113307 |
+
{
|
| 113308 |
+
"epoch": 19.860122699386505,
|
| 113309 |
+
"grad_norm": 0.3110334575176239,
|
| 113310 |
+
"learning_rate": 6.6629794217065724e-09,
|
| 113311 |
+
"loss": 0.6972857117652893,
|
| 113312 |
+
"step": 16186
|
| 113313 |
+
},
|
| 113314 |
+
{
|
| 113315 |
+
"epoch": 19.861349693251533,
|
| 113316 |
+
"grad_norm": 0.2281228005886078,
|
| 113317 |
+
"learning_rate": 6.547610372120705e-09,
|
| 113318 |
+
"loss": 0.43519943952560425,
|
| 113319 |
+
"step": 16187
|
| 113320 |
+
},
|
| 113321 |
+
{
|
| 113322 |
+
"epoch": 19.862576687116565,
|
| 113323 |
+
"grad_norm": 0.24871395528316498,
|
| 113324 |
+
"learning_rate": 6.433248737316988e-09,
|
| 113325 |
+
"loss": 0.5509378910064697,
|
| 113326 |
+
"step": 16188
|
| 113327 |
+
},
|
| 113328 |
+
{
|
| 113329 |
+
"epoch": 19.863803680981594,
|
| 113330 |
+
"grad_norm": 0.26733362674713135,
|
| 113331 |
+
"learning_rate": 6.319894521902847e-09,
|
| 113332 |
+
"loss": 0.5738027691841125,
|
| 113333 |
+
"step": 16189
|
| 113334 |
+
},
|
| 113335 |
+
{
|
| 113336 |
+
"epoch": 19.865030674846626,
|
| 113337 |
+
"grad_norm": 0.3002522885799408,
|
| 113338 |
+
"learning_rate": 6.207547730452401e-09,
|
| 113339 |
+
"loss": 0.3993024528026581,
|
| 113340 |
+
"step": 16190
|
| 113341 |
+
},
|
| 113342 |
+
{
|
| 113343 |
+
"epoch": 19.866257668711658,
|
| 113344 |
+
"grad_norm": 0.3009348511695862,
|
| 113345 |
+
"learning_rate": 6.096208367489808e-09,
|
| 113346 |
+
"loss": 0.7532112002372742,
|
| 113347 |
+
"step": 16191
|
| 113348 |
+
},
|
| 113349 |
+
{
|
| 113350 |
+
"epoch": 19.867484662576686,
|
| 113351 |
+
"grad_norm": 0.25022900104522705,
|
| 113352 |
+
"learning_rate": 5.9858764375031465e-09,
|
| 113353 |
+
"loss": 0.6427910327911377,
|
| 113354 |
+
"step": 16192
|
| 113355 |
+
},
|
| 113356 |
+
{
|
| 113357 |
+
"epoch": 19.868711656441718,
|
| 113358 |
+
"grad_norm": 0.2702728807926178,
|
| 113359 |
+
"learning_rate": 5.876551944941633e-09,
|
| 113360 |
+
"loss": 0.5427796840667725,
|
| 113361 |
+
"step": 16193
|
| 113362 |
+
},
|
| 113363 |
+
{
|
| 113364 |
+
"epoch": 19.86993865030675,
|
| 113365 |
+
"grad_norm": 0.25946158170700073,
|
| 113366 |
+
"learning_rate": 5.7682348942100785e-09,
|
| 113367 |
+
"loss": 0.6330570578575134,
|
| 113368 |
+
"step": 16194
|
| 113369 |
+
},
|
| 113370 |
+
{
|
| 113371 |
+
"epoch": 19.87116564417178,
|
| 113372 |
+
"grad_norm": 0.24378158152103424,
|
| 113373 |
+
"learning_rate": 5.66092528967721e-09,
|
| 113374 |
+
"loss": 0.49621766805648804,
|
| 113375 |
+
"step": 16195
|
| 113376 |
+
},
|
| 113377 |
+
{
|
| 113378 |
+
"epoch": 19.87239263803681,
|
| 113379 |
+
"grad_norm": 0.249457448720932,
|
| 113380 |
+
"learning_rate": 5.554623135664572e-09,
|
| 113381 |
+
"loss": 0.4742406904697418,
|
| 113382 |
+
"step": 16196
|
| 113383 |
+
},
|
| 113384 |
+
{
|
| 113385 |
+
"epoch": 19.87361963190184,
|
| 113386 |
+
"grad_norm": 0.22787180542945862,
|
| 113387 |
+
"learning_rate": 5.449328436460399e-09,
|
| 113388 |
+
"loss": 0.5120702981948853,
|
| 113389 |
+
"step": 16197
|
| 113390 |
+
},
|
| 113391 |
+
{
|
| 113392 |
+
"epoch": 19.87484662576687,
|
| 113393 |
+
"grad_norm": 0.3254927694797516,
|
| 113394 |
+
"learning_rate": 5.345041196305744e-09,
|
| 113395 |
+
"loss": 0.7485535144805908,
|
| 113396 |
+
"step": 16198
|
| 113397 |
+
},
|
| 113398 |
+
{
|
| 113399 |
+
"epoch": 19.876073619631903,
|
| 113400 |
+
"grad_norm": 0.2474086582660675,
|
| 113401 |
+
"learning_rate": 5.241761419405578e-09,
|
| 113402 |
+
"loss": 0.5528963208198547,
|
| 113403 |
+
"step": 16199
|
| 113404 |
+
},
|
| 113405 |
+
{
|
| 113406 |
+
"epoch": 19.87730061349693,
|
| 113407 |
+
"grad_norm": 0.31348666548728943,
|
| 113408 |
+
"learning_rate": 5.139489109923234e-09,
|
| 113409 |
+
"loss": 0.7659933567047119,
|
| 113410 |
+
"step": 16200
|
| 113411 |
}
|
| 113412 |
],
|
| 113413 |
"logging_steps": 1,
|
|
|
|
| 113427 |
"attributes": {}
|
| 113428 |
}
|
| 113429 |
},
|
| 113430 |
+
"total_flos": 4.531283788994445e+19,
|
| 113431 |
"train_batch_size": 8,
|
| 113432 |
"trial_name": null,
|
| 113433 |
"trial_params": null
|