Training in progress, step 16500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58f878363523a0e64e4a73e22211dad474ecc97a22a53538c20d5a02719fcf7c
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c9e70331e79d87f2cfcf1035ca8704b2cd13f5ec3d84b4399417ff4497eef50
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ab462a1b3abfbdf4730d2effc939df974ea556065ac10db2bb1821235ee90fd
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 18.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -113408,6 +113408,2106 @@
|
|
| 113408 |
"learning_rate": 7.999001546436152e-07,
|
| 113409 |
"loss": 0.6776,
|
| 113410 |
"step": 16200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113411 |
}
|
| 113412 |
],
|
| 113413 |
"logging_steps": 1,
|
|
@@ -113427,7 +115527,7 @@
|
|
| 113427 |
"attributes": {}
|
| 113428 |
}
|
| 113429 |
},
|
| 113430 |
-
"total_flos": 9.
|
| 113431 |
"train_batch_size": 8,
|
| 113432 |
"trial_name": null,
|
| 113433 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 18.793162393162394,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 16500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 113408 |
"learning_rate": 7.999001546436152e-07,
|
| 113409 |
"loss": 0.6776,
|
| 113410 |
"step": 16200
|
| 113411 |
+
},
|
| 113412 |
+
{
|
| 113413 |
+
"epoch": 18.452421652421652,
|
| 113414 |
+
"grad_norm": 0.20273059606552124,
|
| 113415 |
+
"learning_rate": 7.987314242875965e-07,
|
| 113416 |
+
"loss": 0.4911,
|
| 113417 |
+
"step": 16201
|
| 113418 |
+
},
|
| 113419 |
+
{
|
| 113420 |
+
"epoch": 18.453561253561254,
|
| 113421 |
+
"grad_norm": 0.18550534546375275,
|
| 113422 |
+
"learning_rate": 7.975635345081917e-07,
|
| 113423 |
+
"loss": 0.706,
|
| 113424 |
+
"step": 16202
|
| 113425 |
+
},
|
| 113426 |
+
{
|
| 113427 |
+
"epoch": 18.454700854700853,
|
| 113428 |
+
"grad_norm": 0.20157377421855927,
|
| 113429 |
+
"learning_rate": 7.963964853459626e-07,
|
| 113430 |
+
"loss": 0.7658,
|
| 113431 |
+
"step": 16203
|
| 113432 |
+
},
|
| 113433 |
+
{
|
| 113434 |
+
"epoch": 18.455840455840455,
|
| 113435 |
+
"grad_norm": 0.1587774008512497,
|
| 113436 |
+
"learning_rate": 7.952302768414466e-07,
|
| 113437 |
+
"loss": 0.6919,
|
| 113438 |
+
"step": 16204
|
| 113439 |
+
},
|
| 113440 |
+
{
|
| 113441 |
+
"epoch": 18.456980056980058,
|
| 113442 |
+
"grad_norm": 0.20730525255203247,
|
| 113443 |
+
"learning_rate": 7.940649090351415e-07,
|
| 113444 |
+
"loss": 0.6302,
|
| 113445 |
+
"step": 16205
|
| 113446 |
+
},
|
| 113447 |
+
{
|
| 113448 |
+
"epoch": 18.458119658119656,
|
| 113449 |
+
"grad_norm": 0.22193406522274017,
|
| 113450 |
+
"learning_rate": 7.929003819675291e-07,
|
| 113451 |
+
"loss": 0.6042,
|
| 113452 |
+
"step": 16206
|
| 113453 |
+
},
|
| 113454 |
+
{
|
| 113455 |
+
"epoch": 18.45925925925926,
|
| 113456 |
+
"grad_norm": 0.18707987666130066,
|
| 113457 |
+
"learning_rate": 7.917366956790573e-07,
|
| 113458 |
+
"loss": 0.6165,
|
| 113459 |
+
"step": 16207
|
| 113460 |
+
},
|
| 113461 |
+
{
|
| 113462 |
+
"epoch": 18.46039886039886,
|
| 113463 |
+
"grad_norm": 0.20526203513145447,
|
| 113464 |
+
"learning_rate": 7.90573850210144e-07,
|
| 113465 |
+
"loss": 0.4681,
|
| 113466 |
+
"step": 16208
|
| 113467 |
+
},
|
| 113468 |
+
{
|
| 113469 |
+
"epoch": 18.46153846153846,
|
| 113470 |
+
"grad_norm": 0.16481465101242065,
|
| 113471 |
+
"learning_rate": 7.894118456011762e-07,
|
| 113472 |
+
"loss": 0.6136,
|
| 113473 |
+
"step": 16209
|
| 113474 |
+
},
|
| 113475 |
+
{
|
| 113476 |
+
"epoch": 18.462678062678062,
|
| 113477 |
+
"grad_norm": 0.1793680191040039,
|
| 113478 |
+
"learning_rate": 7.882506818925134e-07,
|
| 113479 |
+
"loss": 0.7111,
|
| 113480 |
+
"step": 16210
|
| 113481 |
+
},
|
| 113482 |
+
{
|
| 113483 |
+
"epoch": 18.463817663817665,
|
| 113484 |
+
"grad_norm": 0.21176199615001678,
|
| 113485 |
+
"learning_rate": 7.870903591244899e-07,
|
| 113486 |
+
"loss": 0.5728,
|
| 113487 |
+
"step": 16211
|
| 113488 |
+
},
|
| 113489 |
+
{
|
| 113490 |
+
"epoch": 18.464957264957263,
|
| 113491 |
+
"grad_norm": 0.2248607575893402,
|
| 113492 |
+
"learning_rate": 7.859308773374013e-07,
|
| 113493 |
+
"loss": 0.4769,
|
| 113494 |
+
"step": 16212
|
| 113495 |
+
},
|
| 113496 |
+
{
|
| 113497 |
+
"epoch": 18.466096866096866,
|
| 113498 |
+
"grad_norm": 0.19010235369205475,
|
| 113499 |
+
"learning_rate": 7.847722365715238e-07,
|
| 113500 |
+
"loss": 0.8921,
|
| 113501 |
+
"step": 16213
|
| 113502 |
+
},
|
| 113503 |
+
{
|
| 113504 |
+
"epoch": 18.467236467236468,
|
| 113505 |
+
"grad_norm": 0.25821489095687866,
|
| 113506 |
+
"learning_rate": 7.836144368670972e-07,
|
| 113507 |
+
"loss": 0.4498,
|
| 113508 |
+
"step": 16214
|
| 113509 |
+
},
|
| 113510 |
+
{
|
| 113511 |
+
"epoch": 18.468376068376067,
|
| 113512 |
+
"grad_norm": 0.24605529010295868,
|
| 113513 |
+
"learning_rate": 7.824574782643395e-07,
|
| 113514 |
+
"loss": 0.5791,
|
| 113515 |
+
"step": 16215
|
| 113516 |
+
},
|
| 113517 |
+
{
|
| 113518 |
+
"epoch": 18.46951566951567,
|
| 113519 |
+
"grad_norm": 0.17802326381206512,
|
| 113520 |
+
"learning_rate": 7.813013608034297e-07,
|
| 113521 |
+
"loss": 0.7837,
|
| 113522 |
+
"step": 16216
|
| 113523 |
+
},
|
| 113524 |
+
{
|
| 113525 |
+
"epoch": 18.47065527065527,
|
| 113526 |
+
"grad_norm": 0.1973274201154709,
|
| 113527 |
+
"learning_rate": 7.801460845245273e-07,
|
| 113528 |
+
"loss": 0.7205,
|
| 113529 |
+
"step": 16217
|
| 113530 |
+
},
|
| 113531 |
+
{
|
| 113532 |
+
"epoch": 18.47179487179487,
|
| 113533 |
+
"grad_norm": 0.21337224543094635,
|
| 113534 |
+
"learning_rate": 7.789916494677529e-07,
|
| 113535 |
+
"loss": 0.4841,
|
| 113536 |
+
"step": 16218
|
| 113537 |
+
},
|
| 113538 |
+
{
|
| 113539 |
+
"epoch": 18.472934472934472,
|
| 113540 |
+
"grad_norm": 0.18827608227729797,
|
| 113541 |
+
"learning_rate": 7.778380556732079e-07,
|
| 113542 |
+
"loss": 0.75,
|
| 113543 |
+
"step": 16219
|
| 113544 |
+
},
|
| 113545 |
+
{
|
| 113546 |
+
"epoch": 18.474074074074075,
|
| 113547 |
+
"grad_norm": 0.19029515981674194,
|
| 113548 |
+
"learning_rate": 7.766853031809573e-07,
|
| 113549 |
+
"loss": 0.5274,
|
| 113550 |
+
"step": 16220
|
| 113551 |
+
},
|
| 113552 |
+
{
|
| 113553 |
+
"epoch": 18.475213675213674,
|
| 113554 |
+
"grad_norm": 0.1854611337184906,
|
| 113555 |
+
"learning_rate": 7.755333920310415e-07,
|
| 113556 |
+
"loss": 0.6066,
|
| 113557 |
+
"step": 16221
|
| 113558 |
+
},
|
| 113559 |
+
{
|
| 113560 |
+
"epoch": 18.476353276353276,
|
| 113561 |
+
"grad_norm": 0.20245634019374847,
|
| 113562 |
+
"learning_rate": 7.743823222634728e-07,
|
| 113563 |
+
"loss": 0.7071,
|
| 113564 |
+
"step": 16222
|
| 113565 |
+
},
|
| 113566 |
+
{
|
| 113567 |
+
"epoch": 18.477492877492878,
|
| 113568 |
+
"grad_norm": 0.23420220613479614,
|
| 113569 |
+
"learning_rate": 7.73232093918222e-07,
|
| 113570 |
+
"loss": 0.6315,
|
| 113571 |
+
"step": 16223
|
| 113572 |
+
},
|
| 113573 |
+
{
|
| 113574 |
+
"epoch": 18.478632478632477,
|
| 113575 |
+
"grad_norm": 0.17568355798721313,
|
| 113576 |
+
"learning_rate": 7.720827070352432e-07,
|
| 113577 |
+
"loss": 0.6573,
|
| 113578 |
+
"step": 16224
|
| 113579 |
+
},
|
| 113580 |
+
{
|
| 113581 |
+
"epoch": 18.47977207977208,
|
| 113582 |
+
"grad_norm": 0.21530553698539734,
|
| 113583 |
+
"learning_rate": 7.70934161654463e-07,
|
| 113584 |
+
"loss": 0.5061,
|
| 113585 |
+
"step": 16225
|
| 113586 |
+
},
|
| 113587 |
+
{
|
| 113588 |
+
"epoch": 18.48091168091168,
|
| 113589 |
+
"grad_norm": 0.21519404649734497,
|
| 113590 |
+
"learning_rate": 7.697864578157688e-07,
|
| 113591 |
+
"loss": 0.5939,
|
| 113592 |
+
"step": 16226
|
| 113593 |
+
},
|
| 113594 |
+
{
|
| 113595 |
+
"epoch": 18.48205128205128,
|
| 113596 |
+
"grad_norm": 0.22826364636421204,
|
| 113597 |
+
"learning_rate": 7.686395955590231e-07,
|
| 113598 |
+
"loss": 0.5632,
|
| 113599 |
+
"step": 16227
|
| 113600 |
+
},
|
| 113601 |
+
{
|
| 113602 |
+
"epoch": 18.483190883190883,
|
| 113603 |
+
"grad_norm": 0.19922730326652527,
|
| 113604 |
+
"learning_rate": 7.674935749240608e-07,
|
| 113605 |
+
"loss": 0.6566,
|
| 113606 |
+
"step": 16228
|
| 113607 |
+
},
|
| 113608 |
+
{
|
| 113609 |
+
"epoch": 18.484330484330485,
|
| 113610 |
+
"grad_norm": 0.216302752494812,
|
| 113611 |
+
"learning_rate": 7.663483959506861e-07,
|
| 113612 |
+
"loss": 0.6584,
|
| 113613 |
+
"step": 16229
|
| 113614 |
+
},
|
| 113615 |
+
{
|
| 113616 |
+
"epoch": 18.485470085470084,
|
| 113617 |
+
"grad_norm": 0.19709351658821106,
|
| 113618 |
+
"learning_rate": 7.65204058678673e-07,
|
| 113619 |
+
"loss": 0.6128,
|
| 113620 |
+
"step": 16230
|
| 113621 |
+
},
|
| 113622 |
+
{
|
| 113623 |
+
"epoch": 18.486609686609686,
|
| 113624 |
+
"grad_norm": 0.18579338490962982,
|
| 113625 |
+
"learning_rate": 7.640605631477699e-07,
|
| 113626 |
+
"loss": 0.9303,
|
| 113627 |
+
"step": 16231
|
| 113628 |
+
},
|
| 113629 |
+
{
|
| 113630 |
+
"epoch": 18.48774928774929,
|
| 113631 |
+
"grad_norm": 0.22104182839393616,
|
| 113632 |
+
"learning_rate": 7.629179093976923e-07,
|
| 113633 |
+
"loss": 0.6292,
|
| 113634 |
+
"step": 16232
|
| 113635 |
+
},
|
| 113636 |
+
{
|
| 113637 |
+
"epoch": 18.488888888888887,
|
| 113638 |
+
"grad_norm": 0.18530291318893433,
|
| 113639 |
+
"learning_rate": 7.617760974681282e-07,
|
| 113640 |
+
"loss": 0.4868,
|
| 113641 |
+
"step": 16233
|
| 113642 |
+
},
|
| 113643 |
+
{
|
| 113644 |
+
"epoch": 18.49002849002849,
|
| 113645 |
+
"grad_norm": 0.20386242866516113,
|
| 113646 |
+
"learning_rate": 7.6063512739874e-07,
|
| 113647 |
+
"loss": 0.5239,
|
| 113648 |
+
"step": 16234
|
| 113649 |
+
},
|
| 113650 |
+
{
|
| 113651 |
+
"epoch": 18.491168091168092,
|
| 113652 |
+
"grad_norm": 0.1997651904821396,
|
| 113653 |
+
"learning_rate": 7.594949992291489e-07,
|
| 113654 |
+
"loss": 0.7122,
|
| 113655 |
+
"step": 16235
|
| 113656 |
+
},
|
| 113657 |
+
{
|
| 113658 |
+
"epoch": 18.49230769230769,
|
| 113659 |
+
"grad_norm": 0.19039437174797058,
|
| 113660 |
+
"learning_rate": 7.583557129989565e-07,
|
| 113661 |
+
"loss": 0.6687,
|
| 113662 |
+
"step": 16236
|
| 113663 |
+
},
|
| 113664 |
+
{
|
| 113665 |
+
"epoch": 18.493447293447293,
|
| 113666 |
+
"grad_norm": 0.15081074833869934,
|
| 113667 |
+
"learning_rate": 7.572172687477341e-07,
|
| 113668 |
+
"loss": 0.5981,
|
| 113669 |
+
"step": 16237
|
| 113670 |
+
},
|
| 113671 |
+
{
|
| 113672 |
+
"epoch": 18.494586894586895,
|
| 113673 |
+
"grad_norm": 0.23857636749744415,
|
| 113674 |
+
"learning_rate": 7.560796665150305e-07,
|
| 113675 |
+
"loss": 0.5274,
|
| 113676 |
+
"step": 16238
|
| 113677 |
+
},
|
| 113678 |
+
{
|
| 113679 |
+
"epoch": 18.495726495726494,
|
| 113680 |
+
"grad_norm": 0.2259291708469391,
|
| 113681 |
+
"learning_rate": 7.549429063403446e-07,
|
| 113682 |
+
"loss": 0.5356,
|
| 113683 |
+
"step": 16239
|
| 113684 |
+
},
|
| 113685 |
+
{
|
| 113686 |
+
"epoch": 18.496866096866096,
|
| 113687 |
+
"grad_norm": 0.22006577253341675,
|
| 113688 |
+
"learning_rate": 7.538069882631671e-07,
|
| 113689 |
+
"loss": 0.6589,
|
| 113690 |
+
"step": 16240
|
| 113691 |
+
},
|
| 113692 |
+
{
|
| 113693 |
+
"epoch": 18.4980056980057,
|
| 113694 |
+
"grad_norm": 0.21424247324466705,
|
| 113695 |
+
"learning_rate": 7.526719123229526e-07,
|
| 113696 |
+
"loss": 0.7241,
|
| 113697 |
+
"step": 16241
|
| 113698 |
+
},
|
| 113699 |
+
{
|
| 113700 |
+
"epoch": 18.499145299145297,
|
| 113701 |
+
"grad_norm": 0.18898151814937592,
|
| 113702 |
+
"learning_rate": 7.515376785591194e-07,
|
| 113703 |
+
"loss": 0.5691,
|
| 113704 |
+
"step": 16242
|
| 113705 |
+
},
|
| 113706 |
+
{
|
| 113707 |
+
"epoch": 18.5002849002849,
|
| 113708 |
+
"grad_norm": 0.25201770663261414,
|
| 113709 |
+
"learning_rate": 7.504042870110667e-07,
|
| 113710 |
+
"loss": 0.4755,
|
| 113711 |
+
"step": 16243
|
| 113712 |
+
},
|
| 113713 |
+
{
|
| 113714 |
+
"epoch": 18.501424501424502,
|
| 113715 |
+
"grad_norm": 0.1744522601366043,
|
| 113716 |
+
"learning_rate": 7.492717377181602e-07,
|
| 113717 |
+
"loss": 0.5187,
|
| 113718 |
+
"step": 16244
|
| 113719 |
+
},
|
| 113720 |
+
{
|
| 113721 |
+
"epoch": 18.5025641025641,
|
| 113722 |
+
"grad_norm": 0.21565017104148865,
|
| 113723 |
+
"learning_rate": 7.481400307197405e-07,
|
| 113724 |
+
"loss": 0.4777,
|
| 113725 |
+
"step": 16245
|
| 113726 |
+
},
|
| 113727 |
+
{
|
| 113728 |
+
"epoch": 18.503703703703703,
|
| 113729 |
+
"grad_norm": 0.15349186956882477,
|
| 113730 |
+
"learning_rate": 7.47009166055107e-07,
|
| 113731 |
+
"loss": 0.8353,
|
| 113732 |
+
"step": 16246
|
| 113733 |
+
},
|
| 113734 |
+
{
|
| 113735 |
+
"epoch": 18.504843304843305,
|
| 113736 |
+
"grad_norm": 0.16424761712551117,
|
| 113737 |
+
"learning_rate": 7.458791437635393e-07,
|
| 113738 |
+
"loss": 0.8842,
|
| 113739 |
+
"step": 16247
|
| 113740 |
+
},
|
| 113741 |
+
{
|
| 113742 |
+
"epoch": 18.505982905982904,
|
| 113743 |
+
"grad_norm": 0.16871342062950134,
|
| 113744 |
+
"learning_rate": 7.447499638842892e-07,
|
| 113745 |
+
"loss": 0.6337,
|
| 113746 |
+
"step": 16248
|
| 113747 |
+
},
|
| 113748 |
+
{
|
| 113749 |
+
"epoch": 18.507122507122507,
|
| 113750 |
+
"grad_norm": 0.19683855772018433,
|
| 113751 |
+
"learning_rate": 7.436216264565781e-07,
|
| 113752 |
+
"loss": 0.7552,
|
| 113753 |
+
"step": 16249
|
| 113754 |
+
},
|
| 113755 |
+
{
|
| 113756 |
+
"epoch": 18.50826210826211,
|
| 113757 |
+
"grad_norm": 0.2150336056947708,
|
| 113758 |
+
"learning_rate": 7.424941315195888e-07,
|
| 113759 |
+
"loss": 0.6509,
|
| 113760 |
+
"step": 16250
|
| 113761 |
+
},
|
| 113762 |
+
{
|
| 113763 |
+
"epoch": 18.509401709401708,
|
| 113764 |
+
"grad_norm": 0.22778502106666565,
|
| 113765 |
+
"learning_rate": 7.413674791124897e-07,
|
| 113766 |
+
"loss": 0.5306,
|
| 113767 |
+
"step": 16251
|
| 113768 |
+
},
|
| 113769 |
+
{
|
| 113770 |
+
"epoch": 18.51054131054131,
|
| 113771 |
+
"grad_norm": 0.18186631798744202,
|
| 113772 |
+
"learning_rate": 7.40241669274408e-07,
|
| 113773 |
+
"loss": 0.6358,
|
| 113774 |
+
"step": 16252
|
| 113775 |
+
},
|
| 113776 |
+
{
|
| 113777 |
+
"epoch": 18.511680911680912,
|
| 113778 |
+
"grad_norm": 0.22945746779441833,
|
| 113779 |
+
"learning_rate": 7.391167020444483e-07,
|
| 113780 |
+
"loss": 0.632,
|
| 113781 |
+
"step": 16253
|
| 113782 |
+
},
|
| 113783 |
+
{
|
| 113784 |
+
"epoch": 18.51282051282051,
|
| 113785 |
+
"grad_norm": 0.20270879566669464,
|
| 113786 |
+
"learning_rate": 7.379925774616824e-07,
|
| 113787 |
+
"loss": 0.6971,
|
| 113788 |
+
"step": 16254
|
| 113789 |
+
},
|
| 113790 |
+
{
|
| 113791 |
+
"epoch": 18.513960113960113,
|
| 113792 |
+
"grad_norm": 0.17987211048603058,
|
| 113793 |
+
"learning_rate": 7.36869295565154e-07,
|
| 113794 |
+
"loss": 0.616,
|
| 113795 |
+
"step": 16255
|
| 113796 |
+
},
|
| 113797 |
+
{
|
| 113798 |
+
"epoch": 18.515099715099716,
|
| 113799 |
+
"grad_norm": 0.20830969512462616,
|
| 113800 |
+
"learning_rate": 7.357468563938819e-07,
|
| 113801 |
+
"loss": 0.4023,
|
| 113802 |
+
"step": 16256
|
| 113803 |
+
},
|
| 113804 |
+
{
|
| 113805 |
+
"epoch": 18.516239316239318,
|
| 113806 |
+
"grad_norm": 0.22534967958927155,
|
| 113807 |
+
"learning_rate": 7.346252599868486e-07,
|
| 113808 |
+
"loss": 0.6377,
|
| 113809 |
+
"step": 16257
|
| 113810 |
+
},
|
| 113811 |
+
{
|
| 113812 |
+
"epoch": 18.517378917378917,
|
| 113813 |
+
"grad_norm": 0.25643622875213623,
|
| 113814 |
+
"learning_rate": 7.335045063830065e-07,
|
| 113815 |
+
"loss": 0.5016,
|
| 113816 |
+
"step": 16258
|
| 113817 |
+
},
|
| 113818 |
+
{
|
| 113819 |
+
"epoch": 18.51851851851852,
|
| 113820 |
+
"grad_norm": 0.16182145476341248,
|
| 113821 |
+
"learning_rate": 7.323845956212883e-07,
|
| 113822 |
+
"loss": 0.6762,
|
| 113823 |
+
"step": 16259
|
| 113824 |
+
},
|
| 113825 |
+
{
|
| 113826 |
+
"epoch": 18.51965811965812,
|
| 113827 |
+
"grad_norm": 0.21061931550502777,
|
| 113828 |
+
"learning_rate": 7.312655277405905e-07,
|
| 113829 |
+
"loss": 0.6598,
|
| 113830 |
+
"step": 16260
|
| 113831 |
+
},
|
| 113832 |
+
{
|
| 113833 |
+
"epoch": 18.52079772079772,
|
| 113834 |
+
"grad_norm": 0.1625216007232666,
|
| 113835 |
+
"learning_rate": 7.301473027797794e-07,
|
| 113836 |
+
"loss": 0.7835,
|
| 113837 |
+
"step": 16261
|
| 113838 |
+
},
|
| 113839 |
+
{
|
| 113840 |
+
"epoch": 18.521937321937322,
|
| 113841 |
+
"grad_norm": 0.21150529384613037,
|
| 113842 |
+
"learning_rate": 7.290299207776935e-07,
|
| 113843 |
+
"loss": 0.5575,
|
| 113844 |
+
"step": 16262
|
| 113845 |
+
},
|
| 113846 |
+
{
|
| 113847 |
+
"epoch": 18.523076923076925,
|
| 113848 |
+
"grad_norm": 0.21286988258361816,
|
| 113849 |
+
"learning_rate": 7.279133817731432e-07,
|
| 113850 |
+
"loss": 0.4572,
|
| 113851 |
+
"step": 16263
|
| 113852 |
+
},
|
| 113853 |
+
{
|
| 113854 |
+
"epoch": 18.524216524216524,
|
| 113855 |
+
"grad_norm": 0.15413245558738708,
|
| 113856 |
+
"learning_rate": 7.267976858049114e-07,
|
| 113857 |
+
"loss": 0.7582,
|
| 113858 |
+
"step": 16264
|
| 113859 |
+
},
|
| 113860 |
+
{
|
| 113861 |
+
"epoch": 18.525356125356126,
|
| 113862 |
+
"grad_norm": 0.2521764636039734,
|
| 113863 |
+
"learning_rate": 7.256828329117449e-07,
|
| 113864 |
+
"loss": 0.5218,
|
| 113865 |
+
"step": 16265
|
| 113866 |
+
},
|
| 113867 |
+
{
|
| 113868 |
+
"epoch": 18.526495726495728,
|
| 113869 |
+
"grad_norm": 0.20870208740234375,
|
| 113870 |
+
"learning_rate": 7.24568823132371e-07,
|
| 113871 |
+
"loss": 0.6468,
|
| 113872 |
+
"step": 16266
|
| 113873 |
+
},
|
| 113874 |
+
{
|
| 113875 |
+
"epoch": 18.527635327635327,
|
| 113876 |
+
"grad_norm": 0.19469735026359558,
|
| 113877 |
+
"learning_rate": 7.234556565054812e-07,
|
| 113878 |
+
"loss": 0.6161,
|
| 113879 |
+
"step": 16267
|
| 113880 |
+
},
|
| 113881 |
+
{
|
| 113882 |
+
"epoch": 18.52877492877493,
|
| 113883 |
+
"grad_norm": 0.18879103660583496,
|
| 113884 |
+
"learning_rate": 7.22343333069736e-07,
|
| 113885 |
+
"loss": 0.7047,
|
| 113886 |
+
"step": 16268
|
| 113887 |
+
},
|
| 113888 |
+
{
|
| 113889 |
+
"epoch": 18.52991452991453,
|
| 113890 |
+
"grad_norm": 0.19390064477920532,
|
| 113891 |
+
"learning_rate": 7.212318528637685e-07,
|
| 113892 |
+
"loss": 0.7774,
|
| 113893 |
+
"step": 16269
|
| 113894 |
+
},
|
| 113895 |
+
{
|
| 113896 |
+
"epoch": 18.53105413105413,
|
| 113897 |
+
"grad_norm": 0.18350745737552643,
|
| 113898 |
+
"learning_rate": 7.201212159261867e-07,
|
| 113899 |
+
"loss": 0.8099,
|
| 113900 |
+
"step": 16270
|
| 113901 |
+
},
|
| 113902 |
+
{
|
| 113903 |
+
"epoch": 18.532193732193733,
|
| 113904 |
+
"grad_norm": 0.22411833703517914,
|
| 113905 |
+
"learning_rate": 7.190114222955652e-07,
|
| 113906 |
+
"loss": 0.625,
|
| 113907 |
+
"step": 16271
|
| 113908 |
+
},
|
| 113909 |
+
{
|
| 113910 |
+
"epoch": 18.533333333333335,
|
| 113911 |
+
"grad_norm": 0.19694851338863373,
|
| 113912 |
+
"learning_rate": 7.17902472010451e-07,
|
| 113913 |
+
"loss": 0.4554,
|
| 113914 |
+
"step": 16272
|
| 113915 |
+
},
|
| 113916 |
+
{
|
| 113917 |
+
"epoch": 18.534472934472934,
|
| 113918 |
+
"grad_norm": 0.22289873659610748,
|
| 113919 |
+
"learning_rate": 7.167943651093578e-07,
|
| 113920 |
+
"loss": 0.6104,
|
| 113921 |
+
"step": 16273
|
| 113922 |
+
},
|
| 113923 |
+
{
|
| 113924 |
+
"epoch": 18.535612535612536,
|
| 113925 |
+
"grad_norm": 0.21234916150569916,
|
| 113926 |
+
"learning_rate": 7.156871016307771e-07,
|
| 113927 |
+
"loss": 0.7507,
|
| 113928 |
+
"step": 16274
|
| 113929 |
+
},
|
| 113930 |
+
{
|
| 113931 |
+
"epoch": 18.53675213675214,
|
| 113932 |
+
"grad_norm": 0.18940860033035278,
|
| 113933 |
+
"learning_rate": 7.145806816131639e-07,
|
| 113934 |
+
"loss": 0.588,
|
| 113935 |
+
"step": 16275
|
| 113936 |
+
},
|
| 113937 |
+
{
|
| 113938 |
+
"epoch": 18.537891737891737,
|
| 113939 |
+
"grad_norm": 0.16590355336666107,
|
| 113940 |
+
"learning_rate": 7.134751050949489e-07,
|
| 113941 |
+
"loss": 0.8098,
|
| 113942 |
+
"step": 16276
|
| 113943 |
+
},
|
| 113944 |
+
{
|
| 113945 |
+
"epoch": 18.53903133903134,
|
| 113946 |
+
"grad_norm": 0.2010948657989502,
|
| 113947 |
+
"learning_rate": 7.123703721145319e-07,
|
| 113948 |
+
"loss": 0.673,
|
| 113949 |
+
"step": 16277
|
| 113950 |
+
},
|
| 113951 |
+
{
|
| 113952 |
+
"epoch": 18.540170940170942,
|
| 113953 |
+
"grad_norm": 0.1987306922674179,
|
| 113954 |
+
"learning_rate": 7.112664827102822e-07,
|
| 113955 |
+
"loss": 0.566,
|
| 113956 |
+
"step": 16278
|
| 113957 |
+
},
|
| 113958 |
+
{
|
| 113959 |
+
"epoch": 18.54131054131054,
|
| 113960 |
+
"grad_norm": 0.16266214847564697,
|
| 113961 |
+
"learning_rate": 7.101634369205467e-07,
|
| 113962 |
+
"loss": 0.8276,
|
| 113963 |
+
"step": 16279
|
| 113964 |
+
},
|
| 113965 |
+
{
|
| 113966 |
+
"epoch": 18.542450142450143,
|
| 113967 |
+
"grad_norm": 0.21019327640533447,
|
| 113968 |
+
"learning_rate": 7.090612347836284e-07,
|
| 113969 |
+
"loss": 0.5039,
|
| 113970 |
+
"step": 16280
|
| 113971 |
+
},
|
| 113972 |
+
{
|
| 113973 |
+
"epoch": 18.543589743589745,
|
| 113974 |
+
"grad_norm": 0.21681322157382965,
|
| 113975 |
+
"learning_rate": 7.079598763378131e-07,
|
| 113976 |
+
"loss": 0.7382,
|
| 113977 |
+
"step": 16281
|
| 113978 |
+
},
|
| 113979 |
+
{
|
| 113980 |
+
"epoch": 18.544729344729344,
|
| 113981 |
+
"grad_norm": 0.15988457202911377,
|
| 113982 |
+
"learning_rate": 7.068593616213565e-07,
|
| 113983 |
+
"loss": 0.5709,
|
| 113984 |
+
"step": 16282
|
| 113985 |
+
},
|
| 113986 |
+
{
|
| 113987 |
+
"epoch": 18.545868945868946,
|
| 113988 |
+
"grad_norm": 0.1858881264925003,
|
| 113989 |
+
"learning_rate": 7.057596906724806e-07,
|
| 113990 |
+
"loss": 0.6184,
|
| 113991 |
+
"step": 16283
|
| 113992 |
+
},
|
| 113993 |
+
{
|
| 113994 |
+
"epoch": 18.54700854700855,
|
| 113995 |
+
"grad_norm": 0.18496987223625183,
|
| 113996 |
+
"learning_rate": 7.046608635293799e-07,
|
| 113997 |
+
"loss": 0.6723,
|
| 113998 |
+
"step": 16284
|
| 113999 |
+
},
|
| 114000 |
+
{
|
| 114001 |
+
"epoch": 18.548148148148147,
|
| 114002 |
+
"grad_norm": 0.2218230962753296,
|
| 114003 |
+
"learning_rate": 7.035628802302185e-07,
|
| 114004 |
+
"loss": 0.673,
|
| 114005 |
+
"step": 16285
|
| 114006 |
+
},
|
| 114007 |
+
{
|
| 114008 |
+
"epoch": 18.54928774928775,
|
| 114009 |
+
"grad_norm": 0.17258048057556152,
|
| 114010 |
+
"learning_rate": 7.024657408131352e-07,
|
| 114011 |
+
"loss": 0.7943,
|
| 114012 |
+
"step": 16286
|
| 114013 |
+
},
|
| 114014 |
+
{
|
| 114015 |
+
"epoch": 18.550427350427352,
|
| 114016 |
+
"grad_norm": 0.19041316211223602,
|
| 114017 |
+
"learning_rate": 7.013694453162329e-07,
|
| 114018 |
+
"loss": 0.4222,
|
| 114019 |
+
"step": 16287
|
| 114020 |
+
},
|
| 114021 |
+
{
|
| 114022 |
+
"epoch": 18.55156695156695,
|
| 114023 |
+
"grad_norm": 0.17228615283966064,
|
| 114024 |
+
"learning_rate": 7.002739937775949e-07,
|
| 114025 |
+
"loss": 0.6787,
|
| 114026 |
+
"step": 16288
|
| 114027 |
+
},
|
| 114028 |
+
{
|
| 114029 |
+
"epoch": 18.552706552706553,
|
| 114030 |
+
"grad_norm": 0.17214156687259674,
|
| 114031 |
+
"learning_rate": 6.991793862352631e-07,
|
| 114032 |
+
"loss": 0.5802,
|
| 114033 |
+
"step": 16289
|
| 114034 |
+
},
|
| 114035 |
+
{
|
| 114036 |
+
"epoch": 18.553846153846155,
|
| 114037 |
+
"grad_norm": 0.19898304343223572,
|
| 114038 |
+
"learning_rate": 6.980856227272597e-07,
|
| 114039 |
+
"loss": 0.7196,
|
| 114040 |
+
"step": 16290
|
| 114041 |
+
},
|
| 114042 |
+
{
|
| 114043 |
+
"epoch": 18.554985754985754,
|
| 114044 |
+
"grad_norm": 0.2032332420349121,
|
| 114045 |
+
"learning_rate": 6.969927032915741e-07,
|
| 114046 |
+
"loss": 0.654,
|
| 114047 |
+
"step": 16291
|
| 114048 |
+
},
|
| 114049 |
+
{
|
| 114050 |
+
"epoch": 18.556125356125357,
|
| 114051 |
+
"grad_norm": 0.2081405520439148,
|
| 114052 |
+
"learning_rate": 6.959006279661617e-07,
|
| 114053 |
+
"loss": 0.752,
|
| 114054 |
+
"step": 16292
|
| 114055 |
+
},
|
| 114056 |
+
{
|
| 114057 |
+
"epoch": 18.55726495726496,
|
| 114058 |
+
"grad_norm": 0.18995144963264465,
|
| 114059 |
+
"learning_rate": 6.948093967889591e-07,
|
| 114060 |
+
"loss": 0.5449,
|
| 114061 |
+
"step": 16293
|
| 114062 |
+
},
|
| 114063 |
+
{
|
| 114064 |
+
"epoch": 18.558404558404558,
|
| 114065 |
+
"grad_norm": 0.1944435089826584,
|
| 114066 |
+
"learning_rate": 6.937190097978691e-07,
|
| 114067 |
+
"loss": 0.424,
|
| 114068 |
+
"step": 16294
|
| 114069 |
+
},
|
| 114070 |
+
{
|
| 114071 |
+
"epoch": 18.55954415954416,
|
| 114072 |
+
"grad_norm": 0.20616506040096283,
|
| 114073 |
+
"learning_rate": 6.92629467030756e-07,
|
| 114074 |
+
"loss": 0.7207,
|
| 114075 |
+
"step": 16295
|
| 114076 |
+
},
|
| 114077 |
+
{
|
| 114078 |
+
"epoch": 18.560683760683762,
|
| 114079 |
+
"grad_norm": 0.1970258355140686,
|
| 114080 |
+
"learning_rate": 6.915407685254698e-07,
|
| 114081 |
+
"loss": 0.7153,
|
| 114082 |
+
"step": 16296
|
| 114083 |
+
},
|
| 114084 |
+
{
|
| 114085 |
+
"epoch": 18.56182336182336,
|
| 114086 |
+
"grad_norm": 0.17759563028812408,
|
| 114087 |
+
"learning_rate": 6.904529143198196e-07,
|
| 114088 |
+
"loss": 0.7204,
|
| 114089 |
+
"step": 16297
|
| 114090 |
+
},
|
| 114091 |
+
{
|
| 114092 |
+
"epoch": 18.562962962962963,
|
| 114093 |
+
"grad_norm": 0.17698125541210175,
|
| 114094 |
+
"learning_rate": 6.893659044515887e-07,
|
| 114095 |
+
"loss": 0.5877,
|
| 114096 |
+
"step": 16298
|
| 114097 |
+
},
|
| 114098 |
+
{
|
| 114099 |
+
"epoch": 18.564102564102566,
|
| 114100 |
+
"grad_norm": 0.19148777425289154,
|
| 114101 |
+
"learning_rate": 6.882797389585388e-07,
|
| 114102 |
+
"loss": 0.7251,
|
| 114103 |
+
"step": 16299
|
| 114104 |
+
},
|
| 114105 |
+
{
|
| 114106 |
+
"epoch": 18.565242165242164,
|
| 114107 |
+
"grad_norm": 0.17550741136074066,
|
| 114108 |
+
"learning_rate": 6.871944178783896e-07,
|
| 114109 |
+
"loss": 0.5844,
|
| 114110 |
+
"step": 16300
|
| 114111 |
+
},
|
| 114112 |
+
{
|
| 114113 |
+
"epoch": 18.566381766381767,
|
| 114114 |
+
"grad_norm": 0.19892731308937073,
|
| 114115 |
+
"learning_rate": 6.861099412488386e-07,
|
| 114116 |
+
"loss": 0.4684,
|
| 114117 |
+
"step": 16301
|
| 114118 |
+
},
|
| 114119 |
+
{
|
| 114120 |
+
"epoch": 18.56752136752137,
|
| 114121 |
+
"grad_norm": 0.23341961205005646,
|
| 114122 |
+
"learning_rate": 6.850263091075532e-07,
|
| 114123 |
+
"loss": 0.6921,
|
| 114124 |
+
"step": 16302
|
| 114125 |
+
},
|
| 114126 |
+
{
|
| 114127 |
+
"epoch": 18.568660968660968,
|
| 114128 |
+
"grad_norm": 0.21087487041950226,
|
| 114129 |
+
"learning_rate": 6.839435214921697e-07,
|
| 114130 |
+
"loss": 0.6139,
|
| 114131 |
+
"step": 16303
|
| 114132 |
+
},
|
| 114133 |
+
{
|
| 114134 |
+
"epoch": 18.56980056980057,
|
| 114135 |
+
"grad_norm": 0.19287163019180298,
|
| 114136 |
+
"learning_rate": 6.82861578440297e-07,
|
| 114137 |
+
"loss": 0.7285,
|
| 114138 |
+
"step": 16304
|
| 114139 |
+
},
|
| 114140 |
+
{
|
| 114141 |
+
"epoch": 18.570940170940172,
|
| 114142 |
+
"grad_norm": 0.19143036007881165,
|
| 114143 |
+
"learning_rate": 6.817804799895161e-07,
|
| 114144 |
+
"loss": 0.5606,
|
| 114145 |
+
"step": 16305
|
| 114146 |
+
},
|
| 114147 |
+
{
|
| 114148 |
+
"epoch": 18.57207977207977,
|
| 114149 |
+
"grad_norm": 0.19633238017559052,
|
| 114150 |
+
"learning_rate": 6.807002261773721e-07,
|
| 114151 |
+
"loss": 0.6803,
|
| 114152 |
+
"step": 16306
|
| 114153 |
+
},
|
| 114154 |
+
{
|
| 114155 |
+
"epoch": 18.573219373219374,
|
| 114156 |
+
"grad_norm": 0.18908600509166718,
|
| 114157 |
+
"learning_rate": 6.796208170413903e-07,
|
| 114158 |
+
"loss": 0.7697,
|
| 114159 |
+
"step": 16307
|
| 114160 |
+
},
|
| 114161 |
+
{
|
| 114162 |
+
"epoch": 18.574358974358976,
|
| 114163 |
+
"grad_norm": 0.20927876234054565,
|
| 114164 |
+
"learning_rate": 6.785422526190521e-07,
|
| 114165 |
+
"loss": 0.6892,
|
| 114166 |
+
"step": 16308
|
| 114167 |
+
},
|
| 114168 |
+
{
|
| 114169 |
+
"epoch": 18.575498575498575,
|
| 114170 |
+
"grad_norm": 0.17534089088439941,
|
| 114171 |
+
"learning_rate": 6.77464532947833e-07,
|
| 114172 |
+
"loss": 0.7656,
|
| 114173 |
+
"step": 16309
|
| 114174 |
+
},
|
| 114175 |
+
{
|
| 114176 |
+
"epoch": 18.576638176638177,
|
| 114177 |
+
"grad_norm": 0.18784134089946747,
|
| 114178 |
+
"learning_rate": 6.763876580651557e-07,
|
| 114179 |
+
"loss": 0.7512,
|
| 114180 |
+
"step": 16310
|
| 114181 |
+
},
|
| 114182 |
+
{
|
| 114183 |
+
"epoch": 18.57777777777778,
|
| 114184 |
+
"grad_norm": 0.1713857352733612,
|
| 114185 |
+
"learning_rate": 6.753116280084237e-07,
|
| 114186 |
+
"loss": 0.8112,
|
| 114187 |
+
"step": 16311
|
| 114188 |
+
},
|
| 114189 |
+
{
|
| 114190 |
+
"epoch": 18.578917378917378,
|
| 114191 |
+
"grad_norm": 0.2013809233903885,
|
| 114192 |
+
"learning_rate": 6.742364428150128e-07,
|
| 114193 |
+
"loss": 0.4333,
|
| 114194 |
+
"step": 16312
|
| 114195 |
+
},
|
| 114196 |
+
{
|
| 114197 |
+
"epoch": 18.58005698005698,
|
| 114198 |
+
"grad_norm": 0.21230103075504303,
|
| 114199 |
+
"learning_rate": 6.731621025222706e-07,
|
| 114200 |
+
"loss": 0.8469,
|
| 114201 |
+
"step": 16313
|
| 114202 |
+
},
|
| 114203 |
+
{
|
| 114204 |
+
"epoch": 18.581196581196583,
|
| 114205 |
+
"grad_norm": 0.17354519665241241,
|
| 114206 |
+
"learning_rate": 6.720886071675037e-07,
|
| 114207 |
+
"loss": 0.6986,
|
| 114208 |
+
"step": 16314
|
| 114209 |
+
},
|
| 114210 |
+
{
|
| 114211 |
+
"epoch": 18.58233618233618,
|
| 114212 |
+
"grad_norm": 0.2070615440607071,
|
| 114213 |
+
"learning_rate": 6.710159567880014e-07,
|
| 114214 |
+
"loss": 0.4701,
|
| 114215 |
+
"step": 16315
|
| 114216 |
+
},
|
| 114217 |
+
{
|
| 114218 |
+
"epoch": 18.583475783475784,
|
| 114219 |
+
"grad_norm": 0.22802412509918213,
|
| 114220 |
+
"learning_rate": 6.699441514210175e-07,
|
| 114221 |
+
"loss": 0.8033,
|
| 114222 |
+
"step": 16316
|
| 114223 |
+
},
|
| 114224 |
+
{
|
| 114225 |
+
"epoch": 18.584615384615386,
|
| 114226 |
+
"grad_norm": 0.19977618753910065,
|
| 114227 |
+
"learning_rate": 6.68873191103786e-07,
|
| 114228 |
+
"loss": 0.5289,
|
| 114229 |
+
"step": 16317
|
| 114230 |
+
},
|
| 114231 |
+
{
|
| 114232 |
+
"epoch": 18.585754985754985,
|
| 114233 |
+
"grad_norm": 0.18995045125484467,
|
| 114234 |
+
"learning_rate": 6.678030758734994e-07,
|
| 114235 |
+
"loss": 0.8933,
|
| 114236 |
+
"step": 16318
|
| 114237 |
+
},
|
| 114238 |
+
{
|
| 114239 |
+
"epoch": 18.586894586894587,
|
| 114240 |
+
"grad_norm": 0.20440229773521423,
|
| 114241 |
+
"learning_rate": 6.667338057673194e-07,
|
| 114242 |
+
"loss": 0.497,
|
| 114243 |
+
"step": 16319
|
| 114244 |
+
},
|
| 114245 |
+
{
|
| 114246 |
+
"epoch": 18.58803418803419,
|
| 114247 |
+
"grad_norm": 0.16072645783424377,
|
| 114248 |
+
"learning_rate": 6.656653808223972e-07,
|
| 114249 |
+
"loss": 0.5715,
|
| 114250 |
+
"step": 16320
|
| 114251 |
+
},
|
| 114252 |
+
{
|
| 114253 |
+
"epoch": 18.58917378917379,
|
| 114254 |
+
"grad_norm": 0.16689789295196533,
|
| 114255 |
+
"learning_rate": 6.645978010758336e-07,
|
| 114256 |
+
"loss": 0.7835,
|
| 114257 |
+
"step": 16321
|
| 114258 |
+
},
|
| 114259 |
+
{
|
| 114260 |
+
"epoch": 18.59031339031339,
|
| 114261 |
+
"grad_norm": 0.21596330404281616,
|
| 114262 |
+
"learning_rate": 6.635310665647099e-07,
|
| 114263 |
+
"loss": 0.7861,
|
| 114264 |
+
"step": 16322
|
| 114265 |
+
},
|
| 114266 |
+
{
|
| 114267 |
+
"epoch": 18.591452991452993,
|
| 114268 |
+
"grad_norm": 0.2213994413614273,
|
| 114269 |
+
"learning_rate": 6.624651773260798e-07,
|
| 114270 |
+
"loss": 0.7217,
|
| 114271 |
+
"step": 16323
|
| 114272 |
+
},
|
| 114273 |
+
{
|
| 114274 |
+
"epoch": 18.59259259259259,
|
| 114275 |
+
"grad_norm": 0.19551752507686615,
|
| 114276 |
+
"learning_rate": 6.614001333969638e-07,
|
| 114277 |
+
"loss": 0.6508,
|
| 114278 |
+
"step": 16324
|
| 114279 |
+
},
|
| 114280 |
+
{
|
| 114281 |
+
"epoch": 18.593732193732194,
|
| 114282 |
+
"grad_norm": 0.17777617275714874,
|
| 114283 |
+
"learning_rate": 6.603359348143517e-07,
|
| 114284 |
+
"loss": 0.5114,
|
| 114285 |
+
"step": 16325
|
| 114286 |
+
},
|
| 114287 |
+
{
|
| 114288 |
+
"epoch": 18.594871794871796,
|
| 114289 |
+
"grad_norm": 0.1685660481452942,
|
| 114290 |
+
"learning_rate": 6.592725816152057e-07,
|
| 114291 |
+
"loss": 0.8462,
|
| 114292 |
+
"step": 16326
|
| 114293 |
+
},
|
| 114294 |
+
{
|
| 114295 |
+
"epoch": 18.596011396011395,
|
| 114296 |
+
"grad_norm": 0.1765321046113968,
|
| 114297 |
+
"learning_rate": 6.5821007383646e-07,
|
| 114298 |
+
"loss": 0.5678,
|
| 114299 |
+
"step": 16327
|
| 114300 |
+
},
|
| 114301 |
+
{
|
| 114302 |
+
"epoch": 18.597150997150997,
|
| 114303 |
+
"grad_norm": 0.20969374477863312,
|
| 114304 |
+
"learning_rate": 6.571484115150211e-07,
|
| 114305 |
+
"loss": 0.682,
|
| 114306 |
+
"step": 16328
|
| 114307 |
+
},
|
| 114308 |
+
{
|
| 114309 |
+
"epoch": 18.5982905982906,
|
| 114310 |
+
"grad_norm": 0.1690845638513565,
|
| 114311 |
+
"learning_rate": 6.560875946877598e-07,
|
| 114312 |
+
"loss": 0.7952,
|
| 114313 |
+
"step": 16329
|
| 114314 |
+
},
|
| 114315 |
+
{
|
| 114316 |
+
"epoch": 18.5994301994302,
|
| 114317 |
+
"grad_norm": 0.19686806201934814,
|
| 114318 |
+
"learning_rate": 6.550276233915187e-07,
|
| 114319 |
+
"loss": 0.7186,
|
| 114320 |
+
"step": 16330
|
| 114321 |
+
},
|
| 114322 |
+
{
|
| 114323 |
+
"epoch": 18.6005698005698,
|
| 114324 |
+
"grad_norm": 0.22469277679920197,
|
| 114325 |
+
"learning_rate": 6.539684976631211e-07,
|
| 114326 |
+
"loss": 0.5949,
|
| 114327 |
+
"step": 16331
|
| 114328 |
+
},
|
| 114329 |
+
{
|
| 114330 |
+
"epoch": 18.601709401709403,
|
| 114331 |
+
"grad_norm": 0.19334806501865387,
|
| 114332 |
+
"learning_rate": 6.529102175393487e-07,
|
| 114333 |
+
"loss": 0.7002,
|
| 114334 |
+
"step": 16332
|
| 114335 |
+
},
|
| 114336 |
+
{
|
| 114337 |
+
"epoch": 18.602849002849002,
|
| 114338 |
+
"grad_norm": 0.19611862301826477,
|
| 114339 |
+
"learning_rate": 6.518527830569582e-07,
|
| 114340 |
+
"loss": 0.712,
|
| 114341 |
+
"step": 16333
|
| 114342 |
+
},
|
| 114343 |
+
{
|
| 114344 |
+
"epoch": 18.603988603988604,
|
| 114345 |
+
"grad_norm": 0.2791908383369446,
|
| 114346 |
+
"learning_rate": 6.507961942526785e-07,
|
| 114347 |
+
"loss": 0.3825,
|
| 114348 |
+
"step": 16334
|
| 114349 |
+
},
|
| 114350 |
+
{
|
| 114351 |
+
"epoch": 18.605128205128207,
|
| 114352 |
+
"grad_norm": 0.15559428930282593,
|
| 114353 |
+
"learning_rate": 6.497404511632111e-07,
|
| 114354 |
+
"loss": 0.668,
|
| 114355 |
+
"step": 16335
|
| 114356 |
+
},
|
| 114357 |
+
{
|
| 114358 |
+
"epoch": 18.606267806267805,
|
| 114359 |
+
"grad_norm": 0.1852877140045166,
|
| 114360 |
+
"learning_rate": 6.486855538252179e-07,
|
| 114361 |
+
"loss": 0.6352,
|
| 114362 |
+
"step": 16336
|
| 114363 |
+
},
|
| 114364 |
+
{
|
| 114365 |
+
"epoch": 18.607407407407408,
|
| 114366 |
+
"grad_norm": 0.17558401823043823,
|
| 114367 |
+
"learning_rate": 6.476315022753421e-07,
|
| 114368 |
+
"loss": 0.9307,
|
| 114369 |
+
"step": 16337
|
| 114370 |
+
},
|
| 114371 |
+
{
|
| 114372 |
+
"epoch": 18.60854700854701,
|
| 114373 |
+
"grad_norm": 0.1833890676498413,
|
| 114374 |
+
"learning_rate": 6.465782965501932e-07,
|
| 114375 |
+
"loss": 0.56,
|
| 114376 |
+
"step": 16338
|
| 114377 |
+
},
|
| 114378 |
+
{
|
| 114379 |
+
"epoch": 18.60968660968661,
|
| 114380 |
+
"grad_norm": 0.22029294073581696,
|
| 114381 |
+
"learning_rate": 6.455259366863531e-07,
|
| 114382 |
+
"loss": 0.5293,
|
| 114383 |
+
"step": 16339
|
| 114384 |
+
},
|
| 114385 |
+
{
|
| 114386 |
+
"epoch": 18.61082621082621,
|
| 114387 |
+
"grad_norm": 0.17233218252658844,
|
| 114388 |
+
"learning_rate": 6.44474422720373e-07,
|
| 114389 |
+
"loss": 0.6149,
|
| 114390 |
+
"step": 16340
|
| 114391 |
+
},
|
| 114392 |
+
{
|
| 114393 |
+
"epoch": 18.611965811965813,
|
| 114394 |
+
"grad_norm": 0.1834934502840042,
|
| 114395 |
+
"learning_rate": 6.434237546887684e-07,
|
| 114396 |
+
"loss": 0.4983,
|
| 114397 |
+
"step": 16341
|
| 114398 |
+
},
|
| 114399 |
+
{
|
| 114400 |
+
"epoch": 18.613105413105412,
|
| 114401 |
+
"grad_norm": 0.1694076955318451,
|
| 114402 |
+
"learning_rate": 6.423739326280459e-07,
|
| 114403 |
+
"loss": 0.6432,
|
| 114404 |
+
"step": 16342
|
| 114405 |
+
},
|
| 114406 |
+
{
|
| 114407 |
+
"epoch": 18.614245014245014,
|
| 114408 |
+
"grad_norm": 0.20694801211357117,
|
| 114409 |
+
"learning_rate": 6.413249565746543e-07,
|
| 114410 |
+
"loss": 0.7677,
|
| 114411 |
+
"step": 16343
|
| 114412 |
+
},
|
| 114413 |
+
{
|
| 114414 |
+
"epoch": 18.615384615384617,
|
| 114415 |
+
"grad_norm": 0.2359916716814041,
|
| 114416 |
+
"learning_rate": 6.402768265650367e-07,
|
| 114417 |
+
"loss": 0.5534,
|
| 114418 |
+
"step": 16344
|
| 114419 |
+
},
|
| 114420 |
+
{
|
| 114421 |
+
"epoch": 18.616524216524216,
|
| 114422 |
+
"grad_norm": 0.19315852224826813,
|
| 114423 |
+
"learning_rate": 6.392295426355916e-07,
|
| 114424 |
+
"loss": 0.6173,
|
| 114425 |
+
"step": 16345
|
| 114426 |
+
},
|
| 114427 |
+
{
|
| 114428 |
+
"epoch": 18.617663817663818,
|
| 114429 |
+
"grad_norm": 0.24828165769577026,
|
| 114430 |
+
"learning_rate": 6.381831048227011e-07,
|
| 114431 |
+
"loss": 0.4476,
|
| 114432 |
+
"step": 16346
|
| 114433 |
+
},
|
| 114434 |
+
{
|
| 114435 |
+
"epoch": 18.61880341880342,
|
| 114436 |
+
"grad_norm": 0.18524330854415894,
|
| 114437 |
+
"learning_rate": 6.371375131627055e-07,
|
| 114438 |
+
"loss": 0.5514,
|
| 114439 |
+
"step": 16347
|
| 114440 |
+
},
|
| 114441 |
+
{
|
| 114442 |
+
"epoch": 18.61994301994302,
|
| 114443 |
+
"grad_norm": 0.23572108149528503,
|
| 114444 |
+
"learning_rate": 6.360927676919204e-07,
|
| 114445 |
+
"loss": 0.6882,
|
| 114446 |
+
"step": 16348
|
| 114447 |
+
},
|
| 114448 |
+
{
|
| 114449 |
+
"epoch": 18.62108262108262,
|
| 114450 |
+
"grad_norm": 0.17911289632320404,
|
| 114451 |
+
"learning_rate": 6.35048868446636e-07,
|
| 114452 |
+
"loss": 0.8192,
|
| 114453 |
+
"step": 16349
|
| 114454 |
+
},
|
| 114455 |
+
{
|
| 114456 |
+
"epoch": 18.622222222222224,
|
| 114457 |
+
"grad_norm": 0.18745560944080353,
|
| 114458 |
+
"learning_rate": 6.340058154631096e-07,
|
| 114459 |
+
"loss": 0.7429,
|
| 114460 |
+
"step": 16350
|
| 114461 |
+
},
|
| 114462 |
+
{
|
| 114463 |
+
"epoch": 18.623361823361822,
|
| 114464 |
+
"grad_norm": 0.171620711684227,
|
| 114465 |
+
"learning_rate": 6.329636087775647e-07,
|
| 114466 |
+
"loss": 0.7539,
|
| 114467 |
+
"step": 16351
|
| 114468 |
+
},
|
| 114469 |
+
{
|
| 114470 |
+
"epoch": 18.624501424501425,
|
| 114471 |
+
"grad_norm": 0.18377149105072021,
|
| 114472 |
+
"learning_rate": 6.319222484262005e-07,
|
| 114473 |
+
"loss": 0.8062,
|
| 114474 |
+
"step": 16352
|
| 114475 |
+
},
|
| 114476 |
+
{
|
| 114477 |
+
"epoch": 18.625641025641027,
|
| 114478 |
+
"grad_norm": 0.17271770536899567,
|
| 114479 |
+
"learning_rate": 6.308817344451934e-07,
|
| 114480 |
+
"loss": 0.709,
|
| 114481 |
+
"step": 16353
|
| 114482 |
+
},
|
| 114483 |
+
{
|
| 114484 |
+
"epoch": 18.626780626780626,
|
| 114485 |
+
"grad_norm": 0.1912023425102234,
|
| 114486 |
+
"learning_rate": 6.298420668706812e-07,
|
| 114487 |
+
"loss": 0.6841,
|
| 114488 |
+
"step": 16354
|
| 114489 |
+
},
|
| 114490 |
+
{
|
| 114491 |
+
"epoch": 18.627920227920228,
|
| 114492 |
+
"grad_norm": 0.20034343004226685,
|
| 114493 |
+
"learning_rate": 6.288032457387683e-07,
|
| 114494 |
+
"loss": 0.5396,
|
| 114495 |
+
"step": 16355
|
| 114496 |
+
},
|
| 114497 |
+
{
|
| 114498 |
+
"epoch": 18.62905982905983,
|
| 114499 |
+
"grad_norm": 0.17258824408054352,
|
| 114500 |
+
"learning_rate": 6.277652710855397e-07,
|
| 114501 |
+
"loss": 0.8374,
|
| 114502 |
+
"step": 16356
|
| 114503 |
+
},
|
| 114504 |
+
{
|
| 114505 |
+
"epoch": 18.63019943019943,
|
| 114506 |
+
"grad_norm": 0.16941462457180023,
|
| 114507 |
+
"learning_rate": 6.2672814294705e-07,
|
| 114508 |
+
"loss": 0.6927,
|
| 114509 |
+
"step": 16357
|
| 114510 |
+
},
|
| 114511 |
+
{
|
| 114512 |
+
"epoch": 18.63133903133903,
|
| 114513 |
+
"grad_norm": 0.18177206814289093,
|
| 114514 |
+
"learning_rate": 6.256918613593176e-07,
|
| 114515 |
+
"loss": 0.7024,
|
| 114516 |
+
"step": 16358
|
| 114517 |
+
},
|
| 114518 |
+
{
|
| 114519 |
+
"epoch": 18.632478632478634,
|
| 114520 |
+
"grad_norm": 0.215084969997406,
|
| 114521 |
+
"learning_rate": 6.24656426358336e-07,
|
| 114522 |
+
"loss": 0.6746,
|
| 114523 |
+
"step": 16359
|
| 114524 |
+
},
|
| 114525 |
+
{
|
| 114526 |
+
"epoch": 18.633618233618233,
|
| 114527 |
+
"grad_norm": 0.18582645058631897,
|
| 114528 |
+
"learning_rate": 6.236218379800707e-07,
|
| 114529 |
+
"loss": 0.5369,
|
| 114530 |
+
"step": 16360
|
| 114531 |
+
},
|
| 114532 |
+
{
|
| 114533 |
+
"epoch": 18.634757834757835,
|
| 114534 |
+
"grad_norm": 0.17084524035453796,
|
| 114535 |
+
"learning_rate": 6.225880962604513e-07,
|
| 114536 |
+
"loss": 0.5692,
|
| 114537 |
+
"step": 16361
|
| 114538 |
+
},
|
| 114539 |
+
{
|
| 114540 |
+
"epoch": 18.635897435897437,
|
| 114541 |
+
"grad_norm": 0.2446059286594391,
|
| 114542 |
+
"learning_rate": 6.215552012353882e-07,
|
| 114543 |
+
"loss": 0.4709,
|
| 114544 |
+
"step": 16362
|
| 114545 |
+
},
|
| 114546 |
+
{
|
| 114547 |
+
"epoch": 18.637037037037036,
|
| 114548 |
+
"grad_norm": 0.17531663179397583,
|
| 114549 |
+
"learning_rate": 6.205231529407496e-07,
|
| 114550 |
+
"loss": 0.5219,
|
| 114551 |
+
"step": 16363
|
| 114552 |
+
},
|
| 114553 |
+
{
|
| 114554 |
+
"epoch": 18.63817663817664,
|
| 114555 |
+
"grad_norm": 0.17034143209457397,
|
| 114556 |
+
"learning_rate": 6.194919514123904e-07,
|
| 114557 |
+
"loss": 0.7795,
|
| 114558 |
+
"step": 16364
|
| 114559 |
+
},
|
| 114560 |
+
{
|
| 114561 |
+
"epoch": 18.63931623931624,
|
| 114562 |
+
"grad_norm": 0.16582253575325012,
|
| 114563 |
+
"learning_rate": 6.184615966861207e-07,
|
| 114564 |
+
"loss": 0.6171,
|
| 114565 |
+
"step": 16365
|
| 114566 |
+
},
|
| 114567 |
+
{
|
| 114568 |
+
"epoch": 18.64045584045584,
|
| 114569 |
+
"grad_norm": 0.18334414064884186,
|
| 114570 |
+
"learning_rate": 6.174320887977286e-07,
|
| 114571 |
+
"loss": 0.7847,
|
| 114572 |
+
"step": 16366
|
| 114573 |
+
},
|
| 114574 |
+
{
|
| 114575 |
+
"epoch": 18.64159544159544,
|
| 114576 |
+
"grad_norm": 0.22042441368103027,
|
| 114577 |
+
"learning_rate": 6.164034277829745e-07,
|
| 114578 |
+
"loss": 0.4481,
|
| 114579 |
+
"step": 16367
|
| 114580 |
+
},
|
| 114581 |
+
{
|
| 114582 |
+
"epoch": 18.642735042735044,
|
| 114583 |
+
"grad_norm": 0.18286308646202087,
|
| 114584 |
+
"learning_rate": 6.153756136775823e-07,
|
| 114585 |
+
"loss": 0.477,
|
| 114586 |
+
"step": 16368
|
| 114587 |
+
},
|
| 114588 |
+
{
|
| 114589 |
+
"epoch": 18.643874643874643,
|
| 114590 |
+
"grad_norm": 0.19711682200431824,
|
| 114591 |
+
"learning_rate": 6.143486465172571e-07,
|
| 114592 |
+
"loss": 0.7364,
|
| 114593 |
+
"step": 16369
|
| 114594 |
+
},
|
| 114595 |
+
{
|
| 114596 |
+
"epoch": 18.645014245014245,
|
| 114597 |
+
"grad_norm": 0.16850189864635468,
|
| 114598 |
+
"learning_rate": 6.133225263376591e-07,
|
| 114599 |
+
"loss": 0.6956,
|
| 114600 |
+
"step": 16370
|
| 114601 |
+
},
|
| 114602 |
+
{
|
| 114603 |
+
"epoch": 18.646153846153847,
|
| 114604 |
+
"grad_norm": 0.17246362566947937,
|
| 114605 |
+
"learning_rate": 6.122972531744347e-07,
|
| 114606 |
+
"loss": 0.7879,
|
| 114607 |
+
"step": 16371
|
| 114608 |
+
},
|
| 114609 |
+
{
|
| 114610 |
+
"epoch": 18.647293447293446,
|
| 114611 |
+
"grad_norm": 0.19572462141513824,
|
| 114612 |
+
"learning_rate": 6.112728270631946e-07,
|
| 114613 |
+
"loss": 0.8546,
|
| 114614 |
+
"step": 16372
|
| 114615 |
+
},
|
| 114616 |
+
{
|
| 114617 |
+
"epoch": 18.64843304843305,
|
| 114618 |
+
"grad_norm": 0.26401737332344055,
|
| 114619 |
+
"learning_rate": 6.102492480395183e-07,
|
| 114620 |
+
"loss": 0.4992,
|
| 114621 |
+
"step": 16373
|
| 114622 |
+
},
|
| 114623 |
+
{
|
| 114624 |
+
"epoch": 18.64957264957265,
|
| 114625 |
+
"grad_norm": 0.2246359884738922,
|
| 114626 |
+
"learning_rate": 6.092265161389527e-07,
|
| 114627 |
+
"loss": 0.5464,
|
| 114628 |
+
"step": 16374
|
| 114629 |
+
},
|
| 114630 |
+
{
|
| 114631 |
+
"epoch": 18.65071225071225,
|
| 114632 |
+
"grad_norm": 0.1910015493631363,
|
| 114633 |
+
"learning_rate": 6.082046313970302e-07,
|
| 114634 |
+
"loss": 0.8257,
|
| 114635 |
+
"step": 16375
|
| 114636 |
+
},
|
| 114637 |
+
{
|
| 114638 |
+
"epoch": 18.651851851851852,
|
| 114639 |
+
"grad_norm": 0.24764880537986755,
|
| 114640 |
+
"learning_rate": 6.071835938492393e-07,
|
| 114641 |
+
"loss": 0.4395,
|
| 114642 |
+
"step": 16376
|
| 114643 |
+
},
|
| 114644 |
+
{
|
| 114645 |
+
"epoch": 18.652991452991454,
|
| 114646 |
+
"grad_norm": 0.2275485396385193,
|
| 114647 |
+
"learning_rate": 6.061634035310404e-07,
|
| 114648 |
+
"loss": 0.5034,
|
| 114649 |
+
"step": 16377
|
| 114650 |
+
},
|
| 114651 |
+
{
|
| 114652 |
+
"epoch": 18.654131054131053,
|
| 114653 |
+
"grad_norm": 0.203792005777359,
|
| 114654 |
+
"learning_rate": 6.051440604778718e-07,
|
| 114655 |
+
"loss": 0.7497,
|
| 114656 |
+
"step": 16378
|
| 114657 |
+
},
|
| 114658 |
+
{
|
| 114659 |
+
"epoch": 18.655270655270655,
|
| 114660 |
+
"grad_norm": 0.18049485981464386,
|
| 114661 |
+
"learning_rate": 6.041255647251332e-07,
|
| 114662 |
+
"loss": 0.7349,
|
| 114663 |
+
"step": 16379
|
| 114664 |
+
},
|
| 114665 |
+
{
|
| 114666 |
+
"epoch": 18.656410256410258,
|
| 114667 |
+
"grad_norm": 0.18229711055755615,
|
| 114668 |
+
"learning_rate": 6.03107916308207e-07,
|
| 114669 |
+
"loss": 0.4229,
|
| 114670 |
+
"step": 16380
|
| 114671 |
+
},
|
| 114672 |
+
{
|
| 114673 |
+
"epoch": 18.657549857549856,
|
| 114674 |
+
"grad_norm": 0.18593524396419525,
|
| 114675 |
+
"learning_rate": 6.020911152624292e-07,
|
| 114676 |
+
"loss": 0.6194,
|
| 114677 |
+
"step": 16381
|
| 114678 |
+
},
|
| 114679 |
+
{
|
| 114680 |
+
"epoch": 18.65868945868946,
|
| 114681 |
+
"grad_norm": 0.22720669209957123,
|
| 114682 |
+
"learning_rate": 6.010751616231242e-07,
|
| 114683 |
+
"loss": 0.6662,
|
| 114684 |
+
"step": 16382
|
| 114685 |
+
},
|
| 114686 |
+
{
|
| 114687 |
+
"epoch": 18.65982905982906,
|
| 114688 |
+
"grad_norm": 0.2030264288187027,
|
| 114689 |
+
"learning_rate": 6.000600554255775e-07,
|
| 114690 |
+
"loss": 0.8845,
|
| 114691 |
+
"step": 16383
|
| 114692 |
+
},
|
| 114693 |
+
{
|
| 114694 |
+
"epoch": 18.66096866096866,
|
| 114695 |
+
"grad_norm": 0.22050747275352478,
|
| 114696 |
+
"learning_rate": 5.990457967050445e-07,
|
| 114697 |
+
"loss": 0.6346,
|
| 114698 |
+
"step": 16384
|
| 114699 |
+
},
|
| 114700 |
+
{
|
| 114701 |
+
"epoch": 18.662108262108262,
|
| 114702 |
+
"grad_norm": 0.18795745074748993,
|
| 114703 |
+
"learning_rate": 5.980323854967496e-07,
|
| 114704 |
+
"loss": 0.6945,
|
| 114705 |
+
"step": 16385
|
| 114706 |
+
},
|
| 114707 |
+
{
|
| 114708 |
+
"epoch": 18.663247863247864,
|
| 114709 |
+
"grad_norm": 0.2565062940120697,
|
| 114710 |
+
"learning_rate": 5.97019821835898e-07,
|
| 114711 |
+
"loss": 0.4727,
|
| 114712 |
+
"step": 16386
|
| 114713 |
+
},
|
| 114714 |
+
{
|
| 114715 |
+
"epoch": 18.664387464387463,
|
| 114716 |
+
"grad_norm": 0.182623028755188,
|
| 114717 |
+
"learning_rate": 5.960081057576589e-07,
|
| 114718 |
+
"loss": 0.7858,
|
| 114719 |
+
"step": 16387
|
| 114720 |
+
},
|
| 114721 |
+
{
|
| 114722 |
+
"epoch": 18.665527065527066,
|
| 114723 |
+
"grad_norm": 0.1975908875465393,
|
| 114724 |
+
"learning_rate": 5.949972372971679e-07,
|
| 114725 |
+
"loss": 0.6938,
|
| 114726 |
+
"step": 16388
|
| 114727 |
+
},
|
| 114728 |
+
{
|
| 114729 |
+
"epoch": 18.666666666666668,
|
| 114730 |
+
"grad_norm": 0.2245030254125595,
|
| 114731 |
+
"learning_rate": 5.939872164895388e-07,
|
| 114732 |
+
"loss": 0.7397,
|
| 114733 |
+
"step": 16389
|
| 114734 |
+
},
|
| 114735 |
+
{
|
| 114736 |
+
"epoch": 18.667806267806267,
|
| 114737 |
+
"grad_norm": 0.17188383638858795,
|
| 114738 |
+
"learning_rate": 5.929780433698462e-07,
|
| 114739 |
+
"loss": 0.9977,
|
| 114740 |
+
"step": 16390
|
| 114741 |
+
},
|
| 114742 |
+
{
|
| 114743 |
+
"epoch": 18.66894586894587,
|
| 114744 |
+
"grad_norm": 0.17740632593631744,
|
| 114745 |
+
"learning_rate": 5.919697179731482e-07,
|
| 114746 |
+
"loss": 0.6673,
|
| 114747 |
+
"step": 16391
|
| 114748 |
+
},
|
| 114749 |
+
{
|
| 114750 |
+
"epoch": 18.67008547008547,
|
| 114751 |
+
"grad_norm": 0.17257684469223022,
|
| 114752 |
+
"learning_rate": 5.909622403344667e-07,
|
| 114753 |
+
"loss": 0.695,
|
| 114754 |
+
"step": 16392
|
| 114755 |
+
},
|
| 114756 |
+
{
|
| 114757 |
+
"epoch": 18.67122507122507,
|
| 114758 |
+
"grad_norm": 0.21933795511722565,
|
| 114759 |
+
"learning_rate": 5.899556104887904e-07,
|
| 114760 |
+
"loss": 0.5065,
|
| 114761 |
+
"step": 16393
|
| 114762 |
+
},
|
| 114763 |
+
{
|
| 114764 |
+
"epoch": 18.672364672364672,
|
| 114765 |
+
"grad_norm": 0.19246655702590942,
|
| 114766 |
+
"learning_rate": 5.889498284710803e-07,
|
| 114767 |
+
"loss": 0.7339,
|
| 114768 |
+
"step": 16394
|
| 114769 |
+
},
|
| 114770 |
+
{
|
| 114771 |
+
"epoch": 18.673504273504275,
|
| 114772 |
+
"grad_norm": 0.19669175148010254,
|
| 114773 |
+
"learning_rate": 5.879448943162752e-07,
|
| 114774 |
+
"loss": 0.605,
|
| 114775 |
+
"step": 16395
|
| 114776 |
+
},
|
| 114777 |
+
{
|
| 114778 |
+
"epoch": 18.674643874643873,
|
| 114779 |
+
"grad_norm": 0.20934002101421356,
|
| 114780 |
+
"learning_rate": 5.869408080592775e-07,
|
| 114781 |
+
"loss": 0.663,
|
| 114782 |
+
"step": 16396
|
| 114783 |
+
},
|
| 114784 |
+
{
|
| 114785 |
+
"epoch": 18.675783475783476,
|
| 114786 |
+
"grad_norm": 0.2102241963148117,
|
| 114787 |
+
"learning_rate": 5.859375697349623e-07,
|
| 114788 |
+
"loss": 0.4298,
|
| 114789 |
+
"step": 16397
|
| 114790 |
+
},
|
| 114791 |
+
{
|
| 114792 |
+
"epoch": 18.676923076923078,
|
| 114793 |
+
"grad_norm": 0.1985064297914505,
|
| 114794 |
+
"learning_rate": 5.849351793781738e-07,
|
| 114795 |
+
"loss": 0.7757,
|
| 114796 |
+
"step": 16398
|
| 114797 |
+
},
|
| 114798 |
+
{
|
| 114799 |
+
"epoch": 18.678062678062677,
|
| 114800 |
+
"grad_norm": 0.17709557712078094,
|
| 114801 |
+
"learning_rate": 5.839336370237286e-07,
|
| 114802 |
+
"loss": 0.5256,
|
| 114803 |
+
"step": 16399
|
| 114804 |
+
},
|
| 114805 |
+
{
|
| 114806 |
+
"epoch": 18.67920227920228,
|
| 114807 |
+
"grad_norm": 0.19953429698944092,
|
| 114808 |
+
"learning_rate": 5.829329427064129e-07,
|
| 114809 |
+
"loss": 0.8831,
|
| 114810 |
+
"step": 16400
|
| 114811 |
+
},
|
| 114812 |
+
{
|
| 114813 |
+
"epoch": 18.68034188034188,
|
| 114814 |
+
"grad_norm": 0.17611843347549438,
|
| 114815 |
+
"learning_rate": 5.81933096460982e-07,
|
| 114816 |
+
"loss": 0.4334,
|
| 114817 |
+
"step": 16401
|
| 114818 |
+
},
|
| 114819 |
+
{
|
| 114820 |
+
"epoch": 18.68148148148148,
|
| 114821 |
+
"grad_norm": 0.19796553254127502,
|
| 114822 |
+
"learning_rate": 5.809340983221639e-07,
|
| 114823 |
+
"loss": 0.6072,
|
| 114824 |
+
"step": 16402
|
| 114825 |
+
},
|
| 114826 |
+
{
|
| 114827 |
+
"epoch": 18.682621082621083,
|
| 114828 |
+
"grad_norm": 0.1590607613325119,
|
| 114829 |
+
"learning_rate": 5.799359483246614e-07,
|
| 114830 |
+
"loss": 0.6833,
|
| 114831 |
+
"step": 16403
|
| 114832 |
+
},
|
| 114833 |
+
{
|
| 114834 |
+
"epoch": 18.683760683760685,
|
| 114835 |
+
"grad_norm": 0.19662630558013916,
|
| 114836 |
+
"learning_rate": 5.789386465031354e-07,
|
| 114837 |
+
"loss": 0.5794,
|
| 114838 |
+
"step": 16404
|
| 114839 |
+
},
|
| 114840 |
+
{
|
| 114841 |
+
"epoch": 18.684900284900284,
|
| 114842 |
+
"grad_norm": 0.20472319424152374,
|
| 114843 |
+
"learning_rate": 5.779421928922279e-07,
|
| 114844 |
+
"loss": 0.6026,
|
| 114845 |
+
"step": 16405
|
| 114846 |
+
},
|
| 114847 |
+
{
|
| 114848 |
+
"epoch": 18.686039886039886,
|
| 114849 |
+
"grad_norm": 0.2505066990852356,
|
| 114850 |
+
"learning_rate": 5.7694658752655e-07,
|
| 114851 |
+
"loss": 0.6035,
|
| 114852 |
+
"step": 16406
|
| 114853 |
+
},
|
| 114854 |
+
{
|
| 114855 |
+
"epoch": 18.68717948717949,
|
| 114856 |
+
"grad_norm": 0.16703948378562927,
|
| 114857 |
+
"learning_rate": 5.759518304406797e-07,
|
| 114858 |
+
"loss": 0.7095,
|
| 114859 |
+
"step": 16407
|
| 114860 |
+
},
|
| 114861 |
+
{
|
| 114862 |
+
"epoch": 18.688319088319087,
|
| 114863 |
+
"grad_norm": 0.20619726181030273,
|
| 114864 |
+
"learning_rate": 5.749579216691697e-07,
|
| 114865 |
+
"loss": 0.653,
|
| 114866 |
+
"step": 16408
|
| 114867 |
+
},
|
| 114868 |
+
{
|
| 114869 |
+
"epoch": 18.68945868945869,
|
| 114870 |
+
"grad_norm": 0.21306900680065155,
|
| 114871 |
+
"learning_rate": 5.739648612465398e-07,
|
| 114872 |
+
"loss": 0.7377,
|
| 114873 |
+
"step": 16409
|
| 114874 |
+
},
|
| 114875 |
+
{
|
| 114876 |
+
"epoch": 18.69059829059829,
|
| 114877 |
+
"grad_norm": 0.228653684258461,
|
| 114878 |
+
"learning_rate": 5.729726492072846e-07,
|
| 114879 |
+
"loss": 0.6316,
|
| 114880 |
+
"step": 16410
|
| 114881 |
+
},
|
| 114882 |
+
{
|
| 114883 |
+
"epoch": 18.69173789173789,
|
| 114884 |
+
"grad_norm": 0.25274547934532166,
|
| 114885 |
+
"learning_rate": 5.719812855858625e-07,
|
| 114886 |
+
"loss": 0.5878,
|
| 114887 |
+
"step": 16411
|
| 114888 |
+
},
|
| 114889 |
+
{
|
| 114890 |
+
"epoch": 18.692877492877493,
|
| 114891 |
+
"grad_norm": 0.21942993998527527,
|
| 114892 |
+
"learning_rate": 5.709907704167073e-07,
|
| 114893 |
+
"loss": 0.478,
|
| 114894 |
+
"step": 16412
|
| 114895 |
+
},
|
| 114896 |
+
{
|
| 114897 |
+
"epoch": 18.694017094017095,
|
| 114898 |
+
"grad_norm": 0.19420869648456573,
|
| 114899 |
+
"learning_rate": 5.700011037342217e-07,
|
| 114900 |
+
"loss": 0.7059,
|
| 114901 |
+
"step": 16413
|
| 114902 |
+
},
|
| 114903 |
+
{
|
| 114904 |
+
"epoch": 18.695156695156694,
|
| 114905 |
+
"grad_norm": 0.20585720241069794,
|
| 114906 |
+
"learning_rate": 5.690122855727842e-07,
|
| 114907 |
+
"loss": 0.6092,
|
| 114908 |
+
"step": 16414
|
| 114909 |
+
},
|
| 114910 |
+
{
|
| 114911 |
+
"epoch": 18.696296296296296,
|
| 114912 |
+
"grad_norm": 0.1916133612394333,
|
| 114913 |
+
"learning_rate": 5.680243159667309e-07,
|
| 114914 |
+
"loss": 0.7625,
|
| 114915 |
+
"step": 16415
|
| 114916 |
+
},
|
| 114917 |
+
{
|
| 114918 |
+
"epoch": 18.6974358974359,
|
| 114919 |
+
"grad_norm": 0.19194625318050385,
|
| 114920 |
+
"learning_rate": 5.670371949503845e-07,
|
| 114921 |
+
"loss": 0.5937,
|
| 114922 |
+
"step": 16416
|
| 114923 |
+
},
|
| 114924 |
+
{
|
| 114925 |
+
"epoch": 18.698575498575497,
|
| 114926 |
+
"grad_norm": 0.21500788629055023,
|
| 114927 |
+
"learning_rate": 5.660509225580229e-07,
|
| 114928 |
+
"loss": 0.6049,
|
| 114929 |
+
"step": 16417
|
| 114930 |
+
},
|
| 114931 |
+
{
|
| 114932 |
+
"epoch": 18.6997150997151,
|
| 114933 |
+
"grad_norm": 0.19294331967830658,
|
| 114934 |
+
"learning_rate": 5.650654988239107e-07,
|
| 114935 |
+
"loss": 0.5393,
|
| 114936 |
+
"step": 16418
|
| 114937 |
+
},
|
| 114938 |
+
{
|
| 114939 |
+
"epoch": 18.700854700854702,
|
| 114940 |
+
"grad_norm": 0.20396853983402252,
|
| 114941 |
+
"learning_rate": 5.640809237822675e-07,
|
| 114942 |
+
"loss": 0.6981,
|
| 114943 |
+
"step": 16419
|
| 114944 |
+
},
|
| 114945 |
+
{
|
| 114946 |
+
"epoch": 18.7019943019943,
|
| 114947 |
+
"grad_norm": 0.15139256417751312,
|
| 114948 |
+
"learning_rate": 5.630971974672938e-07,
|
| 114949 |
+
"loss": 0.5209,
|
| 114950 |
+
"step": 16420
|
| 114951 |
+
},
|
| 114952 |
+
{
|
| 114953 |
+
"epoch": 18.703133903133903,
|
| 114954 |
+
"grad_norm": 0.17322292923927307,
|
| 114955 |
+
"learning_rate": 5.621143199131567e-07,
|
| 114956 |
+
"loss": 0.7695,
|
| 114957 |
+
"step": 16421
|
| 114958 |
+
},
|
| 114959 |
+
{
|
| 114960 |
+
"epoch": 18.704273504273505,
|
| 114961 |
+
"grad_norm": 0.21698355674743652,
|
| 114962 |
+
"learning_rate": 5.611322911539957e-07,
|
| 114963 |
+
"loss": 0.6869,
|
| 114964 |
+
"step": 16422
|
| 114965 |
+
},
|
| 114966 |
+
{
|
| 114967 |
+
"epoch": 18.705413105413104,
|
| 114968 |
+
"grad_norm": 0.1870664656162262,
|
| 114969 |
+
"learning_rate": 5.601511112239139e-07,
|
| 114970 |
+
"loss": 0.7331,
|
| 114971 |
+
"step": 16423
|
| 114972 |
+
},
|
| 114973 |
+
{
|
| 114974 |
+
"epoch": 18.706552706552706,
|
| 114975 |
+
"grad_norm": 0.16194890439510345,
|
| 114976 |
+
"learning_rate": 5.591707801569951e-07,
|
| 114977 |
+
"loss": 0.8158,
|
| 114978 |
+
"step": 16424
|
| 114979 |
+
},
|
| 114980 |
+
{
|
| 114981 |
+
"epoch": 18.70769230769231,
|
| 114982 |
+
"grad_norm": 0.1954636424779892,
|
| 114983 |
+
"learning_rate": 5.581912979872872e-07,
|
| 114984 |
+
"loss": 0.6078,
|
| 114985 |
+
"step": 16425
|
| 114986 |
+
},
|
| 114987 |
+
{
|
| 114988 |
+
"epoch": 18.708831908831907,
|
| 114989 |
+
"grad_norm": 0.20802627503871918,
|
| 114990 |
+
"learning_rate": 5.572126647488129e-07,
|
| 114991 |
+
"loss": 0.4431,
|
| 114992 |
+
"step": 16426
|
| 114993 |
+
},
|
| 114994 |
+
{
|
| 114995 |
+
"epoch": 18.70997150997151,
|
| 114996 |
+
"grad_norm": 0.19310083985328674,
|
| 114997 |
+
"learning_rate": 5.562348804755562e-07,
|
| 114998 |
+
"loss": 0.5396,
|
| 114999 |
+
"step": 16427
|
| 115000 |
+
},
|
| 115001 |
+
{
|
| 115002 |
+
"epoch": 18.711111111111112,
|
| 115003 |
+
"grad_norm": 0.19261355698108673,
|
| 115004 |
+
"learning_rate": 5.552579452014872e-07,
|
| 115005 |
+
"loss": 0.5911,
|
| 115006 |
+
"step": 16428
|
| 115007 |
+
},
|
| 115008 |
+
{
|
| 115009 |
+
"epoch": 18.71225071225071,
|
| 115010 |
+
"grad_norm": 0.1854696422815323,
|
| 115011 |
+
"learning_rate": 5.542818589605287e-07,
|
| 115012 |
+
"loss": 0.7231,
|
| 115013 |
+
"step": 16429
|
| 115014 |
+
},
|
| 115015 |
+
{
|
| 115016 |
+
"epoch": 18.713390313390313,
|
| 115017 |
+
"grad_norm": 0.23579032719135284,
|
| 115018 |
+
"learning_rate": 5.533066217865896e-07,
|
| 115019 |
+
"loss": 0.7758,
|
| 115020 |
+
"step": 16430
|
| 115021 |
+
},
|
| 115022 |
+
{
|
| 115023 |
+
"epoch": 18.714529914529916,
|
| 115024 |
+
"grad_norm": 0.16397906839847565,
|
| 115025 |
+
"learning_rate": 5.5233223371354e-07,
|
| 115026 |
+
"loss": 0.7157,
|
| 115027 |
+
"step": 16431
|
| 115028 |
+
},
|
| 115029 |
+
{
|
| 115030 |
+
"epoch": 18.715669515669514,
|
| 115031 |
+
"grad_norm": 0.19662268459796906,
|
| 115032 |
+
"learning_rate": 5.513586947752225e-07,
|
| 115033 |
+
"loss": 0.6233,
|
| 115034 |
+
"step": 16432
|
| 115035 |
+
},
|
| 115036 |
+
{
|
| 115037 |
+
"epoch": 18.716809116809117,
|
| 115038 |
+
"grad_norm": 0.22158661484718323,
|
| 115039 |
+
"learning_rate": 5.503860050054571e-07,
|
| 115040 |
+
"loss": 0.7254,
|
| 115041 |
+
"step": 16433
|
| 115042 |
+
},
|
| 115043 |
+
{
|
| 115044 |
+
"epoch": 18.71794871794872,
|
| 115045 |
+
"grad_norm": 0.17012442648410797,
|
| 115046 |
+
"learning_rate": 5.494141644380168e-07,
|
| 115047 |
+
"loss": 0.6851,
|
| 115048 |
+
"step": 16434
|
| 115049 |
+
},
|
| 115050 |
+
{
|
| 115051 |
+
"epoch": 18.719088319088318,
|
| 115052 |
+
"grad_norm": 0.19285432994365692,
|
| 115053 |
+
"learning_rate": 5.484431731066636e-07,
|
| 115054 |
+
"loss": 0.8449,
|
| 115055 |
+
"step": 16435
|
| 115056 |
+
},
|
| 115057 |
+
{
|
| 115058 |
+
"epoch": 18.72022792022792,
|
| 115059 |
+
"grad_norm": 0.20975373685359955,
|
| 115060 |
+
"learning_rate": 5.474730310451203e-07,
|
| 115061 |
+
"loss": 0.5786,
|
| 115062 |
+
"step": 16436
|
| 115063 |
+
},
|
| 115064 |
+
{
|
| 115065 |
+
"epoch": 18.721367521367522,
|
| 115066 |
+
"grad_norm": 0.18161320686340332,
|
| 115067 |
+
"learning_rate": 5.465037382870853e-07,
|
| 115068 |
+
"loss": 0.4212,
|
| 115069 |
+
"step": 16437
|
| 115070 |
+
},
|
| 115071 |
+
{
|
| 115072 |
+
"epoch": 18.72250712250712,
|
| 115073 |
+
"grad_norm": 0.20052970945835114,
|
| 115074 |
+
"learning_rate": 5.455352948662202e-07,
|
| 115075 |
+
"loss": 0.5507,
|
| 115076 |
+
"step": 16438
|
| 115077 |
+
},
|
| 115078 |
+
{
|
| 115079 |
+
"epoch": 18.723646723646723,
|
| 115080 |
+
"grad_norm": 0.30297166109085083,
|
| 115081 |
+
"learning_rate": 5.445677008161648e-07,
|
| 115082 |
+
"loss": 0.5945,
|
| 115083 |
+
"step": 16439
|
| 115084 |
+
},
|
| 115085 |
+
{
|
| 115086 |
+
"epoch": 18.724786324786326,
|
| 115087 |
+
"grad_norm": 0.15235814452171326,
|
| 115088 |
+
"learning_rate": 5.436009561705258e-07,
|
| 115089 |
+
"loss": 0.6678,
|
| 115090 |
+
"step": 16440
|
| 115091 |
+
},
|
| 115092 |
+
{
|
| 115093 |
+
"epoch": 18.725925925925925,
|
| 115094 |
+
"grad_norm": 0.15892931818962097,
|
| 115095 |
+
"learning_rate": 5.426350609628817e-07,
|
| 115096 |
+
"loss": 0.664,
|
| 115097 |
+
"step": 16441
|
| 115098 |
+
},
|
| 115099 |
+
{
|
| 115100 |
+
"epoch": 18.727065527065527,
|
| 115101 |
+
"grad_norm": 0.2232954055070877,
|
| 115102 |
+
"learning_rate": 5.416700152267778e-07,
|
| 115103 |
+
"loss": 0.6181,
|
| 115104 |
+
"step": 16442
|
| 115105 |
+
},
|
| 115106 |
+
{
|
| 115107 |
+
"epoch": 18.72820512820513,
|
| 115108 |
+
"grad_norm": 0.20584240555763245,
|
| 115109 |
+
"learning_rate": 5.407058189957376e-07,
|
| 115110 |
+
"loss": 0.4324,
|
| 115111 |
+
"step": 16443
|
| 115112 |
+
},
|
| 115113 |
+
{
|
| 115114 |
+
"epoch": 18.729344729344728,
|
| 115115 |
+
"grad_norm": 0.18428319692611694,
|
| 115116 |
+
"learning_rate": 5.39742472303248e-07,
|
| 115117 |
+
"loss": 0.5289,
|
| 115118 |
+
"step": 16444
|
| 115119 |
+
},
|
| 115120 |
+
{
|
| 115121 |
+
"epoch": 18.73048433048433,
|
| 115122 |
+
"grad_norm": 0.24277548491954803,
|
| 115123 |
+
"learning_rate": 5.387799751827682e-07,
|
| 115124 |
+
"loss": 0.544,
|
| 115125 |
+
"step": 16445
|
| 115126 |
+
},
|
| 115127 |
+
{
|
| 115128 |
+
"epoch": 18.731623931623933,
|
| 115129 |
+
"grad_norm": 0.2018672227859497,
|
| 115130 |
+
"learning_rate": 5.378183276677273e-07,
|
| 115131 |
+
"loss": 0.7585,
|
| 115132 |
+
"step": 16446
|
| 115133 |
+
},
|
| 115134 |
+
{
|
| 115135 |
+
"epoch": 18.73276353276353,
|
| 115136 |
+
"grad_norm": 0.18324849009513855,
|
| 115137 |
+
"learning_rate": 5.368575297915262e-07,
|
| 115138 |
+
"loss": 0.6266,
|
| 115139 |
+
"step": 16447
|
| 115140 |
+
},
|
| 115141 |
+
{
|
| 115142 |
+
"epoch": 18.733903133903134,
|
| 115143 |
+
"grad_norm": 0.17677345871925354,
|
| 115144 |
+
"learning_rate": 5.358975815875411e-07,
|
| 115145 |
+
"loss": 0.5818,
|
| 115146 |
+
"step": 16448
|
| 115147 |
+
},
|
| 115148 |
+
{
|
| 115149 |
+
"epoch": 18.735042735042736,
|
| 115150 |
+
"grad_norm": 0.1939893662929535,
|
| 115151 |
+
"learning_rate": 5.349384830891035e-07,
|
| 115152 |
+
"loss": 0.4949,
|
| 115153 |
+
"step": 16449
|
| 115154 |
+
},
|
| 115155 |
+
{
|
| 115156 |
+
"epoch": 18.736182336182335,
|
| 115157 |
+
"grad_norm": 0.18341238796710968,
|
| 115158 |
+
"learning_rate": 5.33980234329537e-07,
|
| 115159 |
+
"loss": 0.6179,
|
| 115160 |
+
"step": 16450
|
| 115161 |
+
},
|
| 115162 |
+
{
|
| 115163 |
+
"epoch": 18.737321937321937,
|
| 115164 |
+
"grad_norm": 0.22697743773460388,
|
| 115165 |
+
"learning_rate": 5.330228353421147e-07,
|
| 115166 |
+
"loss": 0.7024,
|
| 115167 |
+
"step": 16451
|
| 115168 |
+
},
|
| 115169 |
+
{
|
| 115170 |
+
"epoch": 18.73846153846154,
|
| 115171 |
+
"grad_norm": 0.18950581550598145,
|
| 115172 |
+
"learning_rate": 5.320662861600962e-07,
|
| 115173 |
+
"loss": 0.7092,
|
| 115174 |
+
"step": 16452
|
| 115175 |
+
},
|
| 115176 |
+
{
|
| 115177 |
+
"epoch": 18.739601139601138,
|
| 115178 |
+
"grad_norm": 0.1713227927684784,
|
| 115179 |
+
"learning_rate": 5.311105868166994e-07,
|
| 115180 |
+
"loss": 0.8411,
|
| 115181 |
+
"step": 16453
|
| 115182 |
+
},
|
| 115183 |
+
{
|
| 115184 |
+
"epoch": 18.74074074074074,
|
| 115185 |
+
"grad_norm": 0.15473496913909912,
|
| 115186 |
+
"learning_rate": 5.301557373451255e-07,
|
| 115187 |
+
"loss": 0.7369,
|
| 115188 |
+
"step": 16454
|
| 115189 |
+
},
|
| 115190 |
+
{
|
| 115191 |
+
"epoch": 18.741880341880343,
|
| 115192 |
+
"grad_norm": 0.2119859904050827,
|
| 115193 |
+
"learning_rate": 5.292017377785341e-07,
|
| 115194 |
+
"loss": 0.6594,
|
| 115195 |
+
"step": 16455
|
| 115196 |
+
},
|
| 115197 |
+
{
|
| 115198 |
+
"epoch": 18.74301994301994,
|
| 115199 |
+
"grad_norm": 0.18900814652442932,
|
| 115200 |
+
"learning_rate": 5.282485881500626e-07,
|
| 115201 |
+
"loss": 0.7711,
|
| 115202 |
+
"step": 16456
|
| 115203 |
+
},
|
| 115204 |
+
{
|
| 115205 |
+
"epoch": 18.744159544159544,
|
| 115206 |
+
"grad_norm": 0.2021235227584839,
|
| 115207 |
+
"learning_rate": 5.272962884928123e-07,
|
| 115208 |
+
"loss": 0.5514,
|
| 115209 |
+
"step": 16457
|
| 115210 |
+
},
|
| 115211 |
+
{
|
| 115212 |
+
"epoch": 18.745299145299146,
|
| 115213 |
+
"grad_norm": 0.1852232962846756,
|
| 115214 |
+
"learning_rate": 5.263448388398622e-07,
|
| 115215 |
+
"loss": 0.7873,
|
| 115216 |
+
"step": 16458
|
| 115217 |
+
},
|
| 115218 |
+
{
|
| 115219 |
+
"epoch": 18.746438746438745,
|
| 115220 |
+
"grad_norm": 0.20583923161029816,
|
| 115221 |
+
"learning_rate": 5.253942392242639e-07,
|
| 115222 |
+
"loss": 0.5979,
|
| 115223 |
+
"step": 16459
|
| 115224 |
+
},
|
| 115225 |
+
{
|
| 115226 |
+
"epoch": 18.747578347578347,
|
| 115227 |
+
"grad_norm": 0.21064208447933197,
|
| 115228 |
+
"learning_rate": 5.24444489679024e-07,
|
| 115229 |
+
"loss": 0.6498,
|
| 115230 |
+
"step": 16460
|
| 115231 |
+
},
|
| 115232 |
+
{
|
| 115233 |
+
"epoch": 18.74871794871795,
|
| 115234 |
+
"grad_norm": 0.227210134267807,
|
| 115235 |
+
"learning_rate": 5.234955902371358e-07,
|
| 115236 |
+
"loss": 0.5567,
|
| 115237 |
+
"step": 16461
|
| 115238 |
+
},
|
| 115239 |
+
{
|
| 115240 |
+
"epoch": 18.74985754985755,
|
| 115241 |
+
"grad_norm": 0.1994452029466629,
|
| 115242 |
+
"learning_rate": 5.225475409315561e-07,
|
| 115243 |
+
"loss": 0.4754,
|
| 115244 |
+
"step": 16462
|
| 115245 |
+
},
|
| 115246 |
+
{
|
| 115247 |
+
"epoch": 18.75099715099715,
|
| 115248 |
+
"grad_norm": 0.17867498099803925,
|
| 115249 |
+
"learning_rate": 5.216003417952142e-07,
|
| 115250 |
+
"loss": 0.92,
|
| 115251 |
+
"step": 16463
|
| 115252 |
+
},
|
| 115253 |
+
{
|
| 115254 |
+
"epoch": 18.752136752136753,
|
| 115255 |
+
"grad_norm": 0.19446507096290588,
|
| 115256 |
+
"learning_rate": 5.20653992861006e-07,
|
| 115257 |
+
"loss": 0.5061,
|
| 115258 |
+
"step": 16464
|
| 115259 |
+
},
|
| 115260 |
+
{
|
| 115261 |
+
"epoch": 18.753276353276352,
|
| 115262 |
+
"grad_norm": 0.2091256082057953,
|
| 115263 |
+
"learning_rate": 5.197084941618052e-07,
|
| 115264 |
+
"loss": 0.7883,
|
| 115265 |
+
"step": 16465
|
| 115266 |
+
},
|
| 115267 |
+
{
|
| 115268 |
+
"epoch": 18.754415954415954,
|
| 115269 |
+
"grad_norm": 0.18889965116977692,
|
| 115270 |
+
"learning_rate": 5.187638457304495e-07,
|
| 115271 |
+
"loss": 0.6264,
|
| 115272 |
+
"step": 16466
|
| 115273 |
+
},
|
| 115274 |
+
{
|
| 115275 |
+
"epoch": 18.755555555555556,
|
| 115276 |
+
"grad_norm": 0.17998528480529785,
|
| 115277 |
+
"learning_rate": 5.178200475997513e-07,
|
| 115278 |
+
"loss": 0.6503,
|
| 115279 |
+
"step": 16467
|
| 115280 |
+
},
|
| 115281 |
+
{
|
| 115282 |
+
"epoch": 18.756695156695155,
|
| 115283 |
+
"grad_norm": 0.21396511793136597,
|
| 115284 |
+
"learning_rate": 5.168770998024874e-07,
|
| 115285 |
+
"loss": 0.51,
|
| 115286 |
+
"step": 16468
|
| 115287 |
+
},
|
| 115288 |
+
{
|
| 115289 |
+
"epoch": 18.757834757834758,
|
| 115290 |
+
"grad_norm": 0.17627449333667755,
|
| 115291 |
+
"learning_rate": 5.159350023714094e-07,
|
| 115292 |
+
"loss": 0.6077,
|
| 115293 |
+
"step": 16469
|
| 115294 |
+
},
|
| 115295 |
+
{
|
| 115296 |
+
"epoch": 18.75897435897436,
|
| 115297 |
+
"grad_norm": 0.16725680232048035,
|
| 115298 |
+
"learning_rate": 5.149937553392409e-07,
|
| 115299 |
+
"loss": 0.6504,
|
| 115300 |
+
"step": 16470
|
| 115301 |
+
},
|
| 115302 |
+
{
|
| 115303 |
+
"epoch": 18.76011396011396,
|
| 115304 |
+
"grad_norm": 0.2420182079076767,
|
| 115305 |
+
"learning_rate": 5.140533587386753e-07,
|
| 115306 |
+
"loss": 0.8192,
|
| 115307 |
+
"step": 16471
|
| 115308 |
+
},
|
| 115309 |
+
{
|
| 115310 |
+
"epoch": 18.76125356125356,
|
| 115311 |
+
"grad_norm": 0.25417861342430115,
|
| 115312 |
+
"learning_rate": 5.131138126023699e-07,
|
| 115313 |
+
"loss": 0.62,
|
| 115314 |
+
"step": 16472
|
| 115315 |
+
},
|
| 115316 |
+
{
|
| 115317 |
+
"epoch": 18.762393162393163,
|
| 115318 |
+
"grad_norm": 0.2393779456615448,
|
| 115319 |
+
"learning_rate": 5.121751169629596e-07,
|
| 115320 |
+
"loss": 0.3951,
|
| 115321 |
+
"step": 16473
|
| 115322 |
+
},
|
| 115323 |
+
{
|
| 115324 |
+
"epoch": 18.763532763532762,
|
| 115325 |
+
"grad_norm": 0.17460033297538757,
|
| 115326 |
+
"learning_rate": 5.112372718530545e-07,
|
| 115327 |
+
"loss": 0.745,
|
| 115328 |
+
"step": 16474
|
| 115329 |
+
},
|
| 115330 |
+
{
|
| 115331 |
+
"epoch": 18.764672364672364,
|
| 115332 |
+
"grad_norm": 0.18978163599967957,
|
| 115333 |
+
"learning_rate": 5.103002773052201e-07,
|
| 115334 |
+
"loss": 0.7875,
|
| 115335 |
+
"step": 16475
|
| 115336 |
+
},
|
| 115337 |
+
{
|
| 115338 |
+
"epoch": 18.765811965811967,
|
| 115339 |
+
"grad_norm": 0.18757495284080505,
|
| 115340 |
+
"learning_rate": 5.093641333520055e-07,
|
| 115341 |
+
"loss": 0.7697,
|
| 115342 |
+
"step": 16476
|
| 115343 |
+
},
|
| 115344 |
+
{
|
| 115345 |
+
"epoch": 18.766951566951565,
|
| 115346 |
+
"grad_norm": 0.16876503825187683,
|
| 115347 |
+
"learning_rate": 5.084288400259235e-07,
|
| 115348 |
+
"loss": 0.9017,
|
| 115349 |
+
"step": 16477
|
| 115350 |
+
},
|
| 115351 |
+
{
|
| 115352 |
+
"epoch": 18.768091168091168,
|
| 115353 |
+
"grad_norm": 0.18147236108779907,
|
| 115354 |
+
"learning_rate": 5.074943973594621e-07,
|
| 115355 |
+
"loss": 0.9934,
|
| 115356 |
+
"step": 16478
|
| 115357 |
+
},
|
| 115358 |
+
{
|
| 115359 |
+
"epoch": 18.76923076923077,
|
| 115360 |
+
"grad_norm": 0.20209896564483643,
|
| 115361 |
+
"learning_rate": 5.065608053850701e-07,
|
| 115362 |
+
"loss": 0.7274,
|
| 115363 |
+
"step": 16479
|
| 115364 |
+
},
|
| 115365 |
+
{
|
| 115366 |
+
"epoch": 18.77037037037037,
|
| 115367 |
+
"grad_norm": 0.9826755523681641,
|
| 115368 |
+
"learning_rate": 5.056280641351829e-07,
|
| 115369 |
+
"loss": 0.6857,
|
| 115370 |
+
"step": 16480
|
| 115371 |
+
},
|
| 115372 |
+
{
|
| 115373 |
+
"epoch": 18.77150997150997,
|
| 115374 |
+
"grad_norm": 0.2170691341161728,
|
| 115375 |
+
"learning_rate": 5.046961736421885e-07,
|
| 115376 |
+
"loss": 0.6784,
|
| 115377 |
+
"step": 16481
|
| 115378 |
+
},
|
| 115379 |
+
{
|
| 115380 |
+
"epoch": 18.772649572649573,
|
| 115381 |
+
"grad_norm": 0.20924778282642365,
|
| 115382 |
+
"learning_rate": 5.037651339384636e-07,
|
| 115383 |
+
"loss": 0.6677,
|
| 115384 |
+
"step": 16482
|
| 115385 |
+
},
|
| 115386 |
+
{
|
| 115387 |
+
"epoch": 18.773789173789172,
|
| 115388 |
+
"grad_norm": 0.24490778148174286,
|
| 115389 |
+
"learning_rate": 5.02834945056338e-07,
|
| 115390 |
+
"loss": 0.6486,
|
| 115391 |
+
"step": 16483
|
| 115392 |
+
},
|
| 115393 |
+
{
|
| 115394 |
+
"epoch": 18.774928774928775,
|
| 115395 |
+
"grad_norm": 0.20174965262413025,
|
| 115396 |
+
"learning_rate": 5.019056070281192e-07,
|
| 115397 |
+
"loss": 0.4965,
|
| 115398 |
+
"step": 16484
|
| 115399 |
+
},
|
| 115400 |
+
{
|
| 115401 |
+
"epoch": 18.776068376068377,
|
| 115402 |
+
"grad_norm": 0.17393049597740173,
|
| 115403 |
+
"learning_rate": 5.009771198860925e-07,
|
| 115404 |
+
"loss": 0.8133,
|
| 115405 |
+
"step": 16485
|
| 115406 |
+
},
|
| 115407 |
+
{
|
| 115408 |
+
"epoch": 18.777207977207976,
|
| 115409 |
+
"grad_norm": 0.2102983295917511,
|
| 115410 |
+
"learning_rate": 5.000494836625013e-07,
|
| 115411 |
+
"loss": 0.6405,
|
| 115412 |
+
"step": 16486
|
| 115413 |
+
},
|
| 115414 |
+
{
|
| 115415 |
+
"epoch": 18.778347578347578,
|
| 115416 |
+
"grad_norm": 0.20062249898910522,
|
| 115417 |
+
"learning_rate": 4.991226983895675e-07,
|
| 115418 |
+
"loss": 0.7842,
|
| 115419 |
+
"step": 16487
|
| 115420 |
+
},
|
| 115421 |
+
{
|
| 115422 |
+
"epoch": 18.77948717948718,
|
| 115423 |
+
"grad_norm": 0.19834795594215393,
|
| 115424 |
+
"learning_rate": 4.981967640994789e-07,
|
| 115425 |
+
"loss": 0.7031,
|
| 115426 |
+
"step": 16488
|
| 115427 |
+
},
|
| 115428 |
+
{
|
| 115429 |
+
"epoch": 18.78062678062678,
|
| 115430 |
+
"grad_norm": 0.20465931296348572,
|
| 115431 |
+
"learning_rate": 4.972716808244016e-07,
|
| 115432 |
+
"loss": 0.5652,
|
| 115433 |
+
"step": 16489
|
| 115434 |
+
},
|
| 115435 |
+
{
|
| 115436 |
+
"epoch": 18.78176638176638,
|
| 115437 |
+
"grad_norm": 0.21190428733825684,
|
| 115438 |
+
"learning_rate": 4.96347448596457e-07,
|
| 115439 |
+
"loss": 0.5751,
|
| 115440 |
+
"step": 16490
|
| 115441 |
+
},
|
| 115442 |
+
{
|
| 115443 |
+
"epoch": 18.782905982905984,
|
| 115444 |
+
"grad_norm": 0.1887473165988922,
|
| 115445 |
+
"learning_rate": 4.954240674477501e-07,
|
| 115446 |
+
"loss": 0.8227,
|
| 115447 |
+
"step": 16491
|
| 115448 |
+
},
|
| 115449 |
+
{
|
| 115450 |
+
"epoch": 18.784045584045582,
|
| 115451 |
+
"grad_norm": 0.15295910835266113,
|
| 115452 |
+
"learning_rate": 4.945015374103551e-07,
|
| 115453 |
+
"loss": 0.7139,
|
| 115454 |
+
"step": 16492
|
| 115455 |
+
},
|
| 115456 |
+
{
|
| 115457 |
+
"epoch": 18.785185185185185,
|
| 115458 |
+
"grad_norm": 0.21601083874702454,
|
| 115459 |
+
"learning_rate": 4.935798585163132e-07,
|
| 115460 |
+
"loss": 0.5737,
|
| 115461 |
+
"step": 16493
|
| 115462 |
+
},
|
| 115463 |
+
{
|
| 115464 |
+
"epoch": 18.786324786324787,
|
| 115465 |
+
"grad_norm": 0.20243585109710693,
|
| 115466 |
+
"learning_rate": 4.926590307976347e-07,
|
| 115467 |
+
"loss": 0.5227,
|
| 115468 |
+
"step": 16494
|
| 115469 |
+
},
|
| 115470 |
+
{
|
| 115471 |
+
"epoch": 18.787464387464386,
|
| 115472 |
+
"grad_norm": 0.19312019646167755,
|
| 115473 |
+
"learning_rate": 4.917390542863026e-07,
|
| 115474 |
+
"loss": 0.5336,
|
| 115475 |
+
"step": 16495
|
| 115476 |
+
},
|
| 115477 |
+
{
|
| 115478 |
+
"epoch": 18.788603988603988,
|
| 115479 |
+
"grad_norm": 0.19280482828617096,
|
| 115480 |
+
"learning_rate": 4.908199290142718e-07,
|
| 115481 |
+
"loss": 0.6827,
|
| 115482 |
+
"step": 16496
|
| 115483 |
+
},
|
| 115484 |
+
{
|
| 115485 |
+
"epoch": 18.78974358974359,
|
| 115486 |
+
"grad_norm": 0.17543920874595642,
|
| 115487 |
+
"learning_rate": 4.899016550134638e-07,
|
| 115488 |
+
"loss": 0.8294,
|
| 115489 |
+
"step": 16497
|
| 115490 |
+
},
|
| 115491 |
+
{
|
| 115492 |
+
"epoch": 18.79088319088319,
|
| 115493 |
+
"grad_norm": 0.18286250531673431,
|
| 115494 |
+
"learning_rate": 4.889842323157757e-07,
|
| 115495 |
+
"loss": 0.763,
|
| 115496 |
+
"step": 16498
|
| 115497 |
+
},
|
| 115498 |
+
{
|
| 115499 |
+
"epoch": 18.79202279202279,
|
| 115500 |
+
"grad_norm": 0.18195152282714844,
|
| 115501 |
+
"learning_rate": 4.880676609530704e-07,
|
| 115502 |
+
"loss": 0.7088,
|
| 115503 |
+
"step": 16499
|
| 115504 |
+
},
|
| 115505 |
+
{
|
| 115506 |
+
"epoch": 18.793162393162394,
|
| 115507 |
+
"grad_norm": 0.1589564085006714,
|
| 115508 |
+
"learning_rate": 4.871519409571867e-07,
|
| 115509 |
+
"loss": 0.839,
|
| 115510 |
+
"step": 16500
|
| 115511 |
}
|
| 115512 |
],
|
| 115513 |
"logging_steps": 1,
|
|
|
|
| 115527 |
"attributes": {}
|
| 115528 |
}
|
| 115529 |
},
|
| 115530 |
+
"total_flos": 9.225596333162594e+19,
|
| 115531 |
"train_batch_size": 8,
|
| 115532 |
"trial_name": null,
|
| 115533 |
"trial_params": null
|