Training in progress, step 16800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf4e06fa8351faf42f0efb6c4c3ae56ed22584343029eae68e2349a1b3670257
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5470e5c9adbf0a6e26a4c9476e5f2020f108cf30d9632cc677fde231098cca92
|
| 3 |
size 173247691
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed3753ab7977739b8eda494dd72defae5750f7283141b11a8f562160ba4c1a23
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde479ebcff72693a7edf070459b3202e37dfc2b1bc2f0e5914213a2e8a60474
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -115508,6 +115508,2106 @@
|
|
| 115508 |
"learning_rate": 4.871519409571867e-07,
|
| 115509 |
"loss": 0.839,
|
| 115510 |
"step": 16500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115511 |
}
|
| 115512 |
],
|
| 115513 |
"logging_steps": 1,
|
|
@@ -115527,7 +117627,7 @@
|
|
| 115527 |
"attributes": {}
|
| 115528 |
}
|
| 115529 |
},
|
| 115530 |
-
"total_flos": 9.
|
| 115531 |
"train_batch_size": 8,
|
| 115532 |
"trial_name": null,
|
| 115533 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 19.134472934472935,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 16800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 115508 |
"learning_rate": 4.871519409571867e-07,
|
| 115509 |
"loss": 0.839,
|
| 115510 |
"step": 16500
|
| 115511 |
+
},
|
| 115512 |
+
{
|
| 115513 |
+
"epoch": 18.794301994301993,
|
| 115514 |
+
"grad_norm": 0.15771450102329254,
|
| 115515 |
+
"learning_rate": 4.862370723599214e-07,
|
| 115516 |
+
"loss": 0.7269,
|
| 115517 |
+
"step": 16501
|
| 115518 |
+
},
|
| 115519 |
+
{
|
| 115520 |
+
"epoch": 18.795441595441595,
|
| 115521 |
+
"grad_norm": 0.19341163337230682,
|
| 115522 |
+
"learning_rate": 4.853230551930599e-07,
|
| 115523 |
+
"loss": 0.8105,
|
| 115524 |
+
"step": 16502
|
| 115525 |
+
},
|
| 115526 |
+
{
|
| 115527 |
+
"epoch": 18.796581196581197,
|
| 115528 |
+
"grad_norm": 0.2219672054052353,
|
| 115529 |
+
"learning_rate": 4.844098894883409e-07,
|
| 115530 |
+
"loss": 0.3761,
|
| 115531 |
+
"step": 16503
|
| 115532 |
+
},
|
| 115533 |
+
{
|
| 115534 |
+
"epoch": 18.797720797720796,
|
| 115535 |
+
"grad_norm": 0.17847643792629242,
|
| 115536 |
+
"learning_rate": 4.834975752774889e-07,
|
| 115537 |
+
"loss": 0.4626,
|
| 115538 |
+
"step": 16504
|
| 115539 |
+
},
|
| 115540 |
+
{
|
| 115541 |
+
"epoch": 18.7988603988604,
|
| 115542 |
+
"grad_norm": 0.18879228830337524,
|
| 115543 |
+
"learning_rate": 4.825861125921816e-07,
|
| 115544 |
+
"loss": 0.6161,
|
| 115545 |
+
"step": 16505
|
| 115546 |
+
},
|
| 115547 |
+
{
|
| 115548 |
+
"epoch": 18.8,
|
| 115549 |
+
"grad_norm": 0.2137671262025833,
|
| 115550 |
+
"learning_rate": 4.816755014640851e-07,
|
| 115551 |
+
"loss": 0.6276,
|
| 115552 |
+
"step": 16506
|
| 115553 |
+
},
|
| 115554 |
+
{
|
| 115555 |
+
"epoch": 18.8011396011396,
|
| 115556 |
+
"grad_norm": 0.19195720553398132,
|
| 115557 |
+
"learning_rate": 4.807657419248212e-07,
|
| 115558 |
+
"loss": 0.786,
|
| 115559 |
+
"step": 16507
|
| 115560 |
+
},
|
| 115561 |
+
{
|
| 115562 |
+
"epoch": 18.802279202279202,
|
| 115563 |
+
"grad_norm": 0.19901008903980255,
|
| 115564 |
+
"learning_rate": 4.798568340059928e-07,
|
| 115565 |
+
"loss": 0.5904,
|
| 115566 |
+
"step": 16508
|
| 115567 |
+
},
|
| 115568 |
+
{
|
| 115569 |
+
"epoch": 18.803418803418804,
|
| 115570 |
+
"grad_norm": 0.17620739340782166,
|
| 115571 |
+
"learning_rate": 4.789487777391688e-07,
|
| 115572 |
+
"loss": 0.7698,
|
| 115573 |
+
"step": 16509
|
| 115574 |
+
},
|
| 115575 |
+
{
|
| 115576 |
+
"epoch": 18.804558404558403,
|
| 115577 |
+
"grad_norm": 0.23002579808235168,
|
| 115578 |
+
"learning_rate": 4.780415731558851e-07,
|
| 115579 |
+
"loss": 0.5707,
|
| 115580 |
+
"step": 16510
|
| 115581 |
+
},
|
| 115582 |
+
{
|
| 115583 |
+
"epoch": 18.805698005698005,
|
| 115584 |
+
"grad_norm": 0.1925451159477234,
|
| 115585 |
+
"learning_rate": 4.771352202876528e-07,
|
| 115586 |
+
"loss": 0.579,
|
| 115587 |
+
"step": 16511
|
| 115588 |
+
},
|
| 115589 |
+
{
|
| 115590 |
+
"epoch": 18.806837606837608,
|
| 115591 |
+
"grad_norm": 0.17723438143730164,
|
| 115592 |
+
"learning_rate": 4.7622971916595483e-07,
|
| 115593 |
+
"loss": 0.8273,
|
| 115594 |
+
"step": 16512
|
| 115595 |
+
},
|
| 115596 |
+
{
|
| 115597 |
+
"epoch": 18.807977207977206,
|
| 115598 |
+
"grad_norm": 0.21860025823116302,
|
| 115599 |
+
"learning_rate": 4.7532506982223835e-07,
|
| 115600 |
+
"loss": 0.7466,
|
| 115601 |
+
"step": 16513
|
| 115602 |
+
},
|
| 115603 |
+
{
|
| 115604 |
+
"epoch": 18.80911680911681,
|
| 115605 |
+
"grad_norm": 0.19728177785873413,
|
| 115606 |
+
"learning_rate": 4.7442127228792264e-07,
|
| 115607 |
+
"loss": 0.7042,
|
| 115608 |
+
"step": 16514
|
| 115609 |
+
},
|
| 115610 |
+
{
|
| 115611 |
+
"epoch": 18.81025641025641,
|
| 115612 |
+
"grad_norm": 0.17997390031814575,
|
| 115613 |
+
"learning_rate": 4.735183265944049e-07,
|
| 115614 |
+
"loss": 0.5867,
|
| 115615 |
+
"step": 16515
|
| 115616 |
+
},
|
| 115617 |
+
{
|
| 115618 |
+
"epoch": 18.81139601139601,
|
| 115619 |
+
"grad_norm": 0.22678206861019135,
|
| 115620 |
+
"learning_rate": 4.7261623277304324e-07,
|
| 115621 |
+
"loss": 0.6038,
|
| 115622 |
+
"step": 16516
|
| 115623 |
+
},
|
| 115624 |
+
{
|
| 115625 |
+
"epoch": 18.812535612535612,
|
| 115626 |
+
"grad_norm": 0.20823009312152863,
|
| 115627 |
+
"learning_rate": 4.71714990855171e-07,
|
| 115628 |
+
"loss": 0.7805,
|
| 115629 |
+
"step": 16517
|
| 115630 |
+
},
|
| 115631 |
+
{
|
| 115632 |
+
"epoch": 18.813675213675214,
|
| 115633 |
+
"grad_norm": 0.19987599551677704,
|
| 115634 |
+
"learning_rate": 4.708146008720854e-07,
|
| 115635 |
+
"loss": 0.5464,
|
| 115636 |
+
"step": 16518
|
| 115637 |
+
},
|
| 115638 |
+
{
|
| 115639 |
+
"epoch": 18.814814814814813,
|
| 115640 |
+
"grad_norm": 0.1835150420665741,
|
| 115641 |
+
"learning_rate": 4.699150628550697e-07,
|
| 115642 |
+
"loss": 0.7399,
|
| 115643 |
+
"step": 16519
|
| 115644 |
+
},
|
| 115645 |
+
{
|
| 115646 |
+
"epoch": 18.815954415954415,
|
| 115647 |
+
"grad_norm": 0.19724053144454956,
|
| 115648 |
+
"learning_rate": 4.6901637683536e-07,
|
| 115649 |
+
"loss": 0.5585,
|
| 115650 |
+
"step": 16520
|
| 115651 |
+
},
|
| 115652 |
+
{
|
| 115653 |
+
"epoch": 18.817094017094018,
|
| 115654 |
+
"grad_norm": 0.16801817715168,
|
| 115655 |
+
"learning_rate": 4.681185428441731e-07,
|
| 115656 |
+
"loss": 0.4347,
|
| 115657 |
+
"step": 16521
|
| 115658 |
+
},
|
| 115659 |
+
{
|
| 115660 |
+
"epoch": 18.81823361823362,
|
| 115661 |
+
"grad_norm": 0.18089032173156738,
|
| 115662 |
+
"learning_rate": 4.6722156091268956e-07,
|
| 115663 |
+
"loss": 0.8058,
|
| 115664 |
+
"step": 16522
|
| 115665 |
+
},
|
| 115666 |
+
{
|
| 115667 |
+
"epoch": 18.81937321937322,
|
| 115668 |
+
"grad_norm": 0.19650566577911377,
|
| 115669 |
+
"learning_rate": 4.663254310720705e-07,
|
| 115670 |
+
"loss": 0.6817,
|
| 115671 |
+
"step": 16523
|
| 115672 |
+
},
|
| 115673 |
+
{
|
| 115674 |
+
"epoch": 18.82051282051282,
|
| 115675 |
+
"grad_norm": 0.20789745450019836,
|
| 115676 |
+
"learning_rate": 4.6543015335343554e-07,
|
| 115677 |
+
"loss": 0.6766,
|
| 115678 |
+
"step": 16524
|
| 115679 |
+
},
|
| 115680 |
+
{
|
| 115681 |
+
"epoch": 18.821652421652423,
|
| 115682 |
+
"grad_norm": 0.21882633864879608,
|
| 115683 |
+
"learning_rate": 4.6453572778787925e-07,
|
| 115684 |
+
"loss": 0.8038,
|
| 115685 |
+
"step": 16525
|
| 115686 |
+
},
|
| 115687 |
+
{
|
| 115688 |
+
"epoch": 18.822792022792022,
|
| 115689 |
+
"grad_norm": 0.1772392988204956,
|
| 115690 |
+
"learning_rate": 4.6364215440647117e-07,
|
| 115691 |
+
"loss": 0.6001,
|
| 115692 |
+
"step": 16526
|
| 115693 |
+
},
|
| 115694 |
+
{
|
| 115695 |
+
"epoch": 18.823931623931625,
|
| 115696 |
+
"grad_norm": 0.26594915986061096,
|
| 115697 |
+
"learning_rate": 4.6274943324025046e-07,
|
| 115698 |
+
"loss": 0.5209,
|
| 115699 |
+
"step": 16527
|
| 115700 |
+
},
|
| 115701 |
+
{
|
| 115702 |
+
"epoch": 18.825071225071227,
|
| 115703 |
+
"grad_norm": 0.22183556854724884,
|
| 115704 |
+
"learning_rate": 4.6185756432021444e-07,
|
| 115705 |
+
"loss": 0.5135,
|
| 115706 |
+
"step": 16528
|
| 115707 |
+
},
|
| 115708 |
+
{
|
| 115709 |
+
"epoch": 18.826210826210826,
|
| 115710 |
+
"grad_norm": 0.17441076040267944,
|
| 115711 |
+
"learning_rate": 4.609665476773439e-07,
|
| 115712 |
+
"loss": 0.6903,
|
| 115713 |
+
"step": 16529
|
| 115714 |
+
},
|
| 115715 |
+
{
|
| 115716 |
+
"epoch": 18.827350427350428,
|
| 115717 |
+
"grad_norm": 0.16808576881885529,
|
| 115718 |
+
"learning_rate": 4.6007638334259193e-07,
|
| 115719 |
+
"loss": 0.5876,
|
| 115720 |
+
"step": 16530
|
| 115721 |
+
},
|
| 115722 |
+
{
|
| 115723 |
+
"epoch": 18.82849002849003,
|
| 115724 |
+
"grad_norm": 0.17738579213619232,
|
| 115725 |
+
"learning_rate": 4.591870713468727e-07,
|
| 115726 |
+
"loss": 0.6782,
|
| 115727 |
+
"step": 16531
|
| 115728 |
+
},
|
| 115729 |
+
{
|
| 115730 |
+
"epoch": 18.82962962962963,
|
| 115731 |
+
"grad_norm": 0.21790333092212677,
|
| 115732 |
+
"learning_rate": 4.582986117210697e-07,
|
| 115733 |
+
"loss": 0.5292,
|
| 115734 |
+
"step": 16532
|
| 115735 |
+
},
|
| 115736 |
+
{
|
| 115737 |
+
"epoch": 18.83076923076923,
|
| 115738 |
+
"grad_norm": 0.1882610023021698,
|
| 115739 |
+
"learning_rate": 4.5741100449605003e-07,
|
| 115740 |
+
"loss": 0.8497,
|
| 115741 |
+
"step": 16533
|
| 115742 |
+
},
|
| 115743 |
+
{
|
| 115744 |
+
"epoch": 18.831908831908834,
|
| 115745 |
+
"grad_norm": 0.24303381145000458,
|
| 115746 |
+
"learning_rate": 4.5652424970263895e-07,
|
| 115747 |
+
"loss": 0.5634,
|
| 115748 |
+
"step": 16534
|
| 115749 |
+
},
|
| 115750 |
+
{
|
| 115751 |
+
"epoch": 18.833048433048432,
|
| 115752 |
+
"grad_norm": 0.1699916273355484,
|
| 115753 |
+
"learning_rate": 4.5563834737163137e-07,
|
| 115754 |
+
"loss": 0.7074,
|
| 115755 |
+
"step": 16535
|
| 115756 |
+
},
|
| 115757 |
+
{
|
| 115758 |
+
"epoch": 18.834188034188035,
|
| 115759 |
+
"grad_norm": 0.20706459879875183,
|
| 115760 |
+
"learning_rate": 4.5475329753380256e-07,
|
| 115761 |
+
"loss": 0.6965,
|
| 115762 |
+
"step": 16536
|
| 115763 |
+
},
|
| 115764 |
+
{
|
| 115765 |
+
"epoch": 18.835327635327637,
|
| 115766 |
+
"grad_norm": 0.19681118428707123,
|
| 115767 |
+
"learning_rate": 4.538691002198919e-07,
|
| 115768 |
+
"loss": 0.6697,
|
| 115769 |
+
"step": 16537
|
| 115770 |
+
},
|
| 115771 |
+
{
|
| 115772 |
+
"epoch": 18.836467236467236,
|
| 115773 |
+
"grad_norm": 0.18500886857509613,
|
| 115774 |
+
"learning_rate": 4.529857554606137e-07,
|
| 115775 |
+
"loss": 0.726,
|
| 115776 |
+
"step": 16538
|
| 115777 |
+
},
|
| 115778 |
+
{
|
| 115779 |
+
"epoch": 18.837606837606838,
|
| 115780 |
+
"grad_norm": 0.20500552654266357,
|
| 115781 |
+
"learning_rate": 4.5210326328664067e-07,
|
| 115782 |
+
"loss": 0.6376,
|
| 115783 |
+
"step": 16539
|
| 115784 |
+
},
|
| 115785 |
+
{
|
| 115786 |
+
"epoch": 18.83874643874644,
|
| 115787 |
+
"grad_norm": 0.20892156660556793,
|
| 115788 |
+
"learning_rate": 4.51221623728626e-07,
|
| 115789 |
+
"loss": 0.7291,
|
| 115790 |
+
"step": 16540
|
| 115791 |
+
},
|
| 115792 |
+
{
|
| 115793 |
+
"epoch": 18.83988603988604,
|
| 115794 |
+
"grad_norm": 0.18950065970420837,
|
| 115795 |
+
"learning_rate": 4.5034083681719797e-07,
|
| 115796 |
+
"loss": 0.6232,
|
| 115797 |
+
"step": 16541
|
| 115798 |
+
},
|
| 115799 |
+
{
|
| 115800 |
+
"epoch": 18.84102564102564,
|
| 115801 |
+
"grad_norm": 0.16415338218212128,
|
| 115802 |
+
"learning_rate": 4.494609025829405e-07,
|
| 115803 |
+
"loss": 0.6148,
|
| 115804 |
+
"step": 16542
|
| 115805 |
+
},
|
| 115806 |
+
{
|
| 115807 |
+
"epoch": 18.842165242165244,
|
| 115808 |
+
"grad_norm": 0.24179856479167938,
|
| 115809 |
+
"learning_rate": 4.4858182105642356e-07,
|
| 115810 |
+
"loss": 0.7007,
|
| 115811 |
+
"step": 16543
|
| 115812 |
+
},
|
| 115813 |
+
{
|
| 115814 |
+
"epoch": 18.843304843304843,
|
| 115815 |
+
"grad_norm": 0.18492870032787323,
|
| 115816 |
+
"learning_rate": 4.477035922681755e-07,
|
| 115817 |
+
"loss": 0.5736,
|
| 115818 |
+
"step": 16544
|
| 115819 |
+
},
|
| 115820 |
+
{
|
| 115821 |
+
"epoch": 18.844444444444445,
|
| 115822 |
+
"grad_norm": 0.15735740959644318,
|
| 115823 |
+
"learning_rate": 4.468262162486997e-07,
|
| 115824 |
+
"loss": 0.7419,
|
| 115825 |
+
"step": 16545
|
| 115826 |
+
},
|
| 115827 |
+
{
|
| 115828 |
+
"epoch": 18.845584045584047,
|
| 115829 |
+
"grad_norm": 0.17746874690055847,
|
| 115830 |
+
"learning_rate": 4.4594969302847454e-07,
|
| 115831 |
+
"loss": 0.7512,
|
| 115832 |
+
"step": 16546
|
| 115833 |
+
},
|
| 115834 |
+
{
|
| 115835 |
+
"epoch": 18.846723646723646,
|
| 115836 |
+
"grad_norm": 0.2393515706062317,
|
| 115837 |
+
"learning_rate": 4.4507402263793686e-07,
|
| 115838 |
+
"loss": 0.7666,
|
| 115839 |
+
"step": 16547
|
| 115840 |
+
},
|
| 115841 |
+
{
|
| 115842 |
+
"epoch": 18.84786324786325,
|
| 115843 |
+
"grad_norm": 0.1816677302122116,
|
| 115844 |
+
"learning_rate": 4.441992051075039e-07,
|
| 115845 |
+
"loss": 0.8176,
|
| 115846 |
+
"step": 16548
|
| 115847 |
+
},
|
| 115848 |
+
{
|
| 115849 |
+
"epoch": 18.84900284900285,
|
| 115850 |
+
"grad_norm": 0.21298880875110626,
|
| 115851 |
+
"learning_rate": 4.4332524046756254e-07,
|
| 115852 |
+
"loss": 0.5793,
|
| 115853 |
+
"step": 16549
|
| 115854 |
+
},
|
| 115855 |
+
{
|
| 115856 |
+
"epoch": 18.85014245014245,
|
| 115857 |
+
"grad_norm": 0.1775524765253067,
|
| 115858 |
+
"learning_rate": 4.4245212874846895e-07,
|
| 115859 |
+
"loss": 0.7174,
|
| 115860 |
+
"step": 16550
|
| 115861 |
+
},
|
| 115862 |
+
{
|
| 115863 |
+
"epoch": 18.851282051282052,
|
| 115864 |
+
"grad_norm": 0.20027805864810944,
|
| 115865 |
+
"learning_rate": 4.4157986998054346e-07,
|
| 115866 |
+
"loss": 0.6773,
|
| 115867 |
+
"step": 16551
|
| 115868 |
+
},
|
| 115869 |
+
{
|
| 115870 |
+
"epoch": 18.852421652421654,
|
| 115871 |
+
"grad_norm": 0.18412970006465912,
|
| 115872 |
+
"learning_rate": 4.407084641940867e-07,
|
| 115873 |
+
"loss": 0.6431,
|
| 115874 |
+
"step": 16552
|
| 115875 |
+
},
|
| 115876 |
+
{
|
| 115877 |
+
"epoch": 18.853561253561253,
|
| 115878 |
+
"grad_norm": 0.19605478644371033,
|
| 115879 |
+
"learning_rate": 4.3983791141936346e-07,
|
| 115880 |
+
"loss": 0.4805,
|
| 115881 |
+
"step": 16553
|
| 115882 |
+
},
|
| 115883 |
+
{
|
| 115884 |
+
"epoch": 18.854700854700855,
|
| 115885 |
+
"grad_norm": 0.18446141481399536,
|
| 115886 |
+
"learning_rate": 4.3896821168660786e-07,
|
| 115887 |
+
"loss": 0.6969,
|
| 115888 |
+
"step": 16554
|
| 115889 |
+
},
|
| 115890 |
+
{
|
| 115891 |
+
"epoch": 18.855840455840458,
|
| 115892 |
+
"grad_norm": 0.23959897458553314,
|
| 115893 |
+
"learning_rate": 4.3809936502603177e-07,
|
| 115894 |
+
"loss": 0.6129,
|
| 115895 |
+
"step": 16555
|
| 115896 |
+
},
|
| 115897 |
+
{
|
| 115898 |
+
"epoch": 18.856980056980056,
|
| 115899 |
+
"grad_norm": 0.2176634520292282,
|
| 115900 |
+
"learning_rate": 4.3723137146780836e-07,
|
| 115901 |
+
"loss": 0.7465,
|
| 115902 |
+
"step": 16556
|
| 115903 |
+
},
|
| 115904 |
+
{
|
| 115905 |
+
"epoch": 18.85811965811966,
|
| 115906 |
+
"grad_norm": 0.25287431478500366,
|
| 115907 |
+
"learning_rate": 4.3636423104209126e-07,
|
| 115908 |
+
"loss": 0.517,
|
| 115909 |
+
"step": 16557
|
| 115910 |
+
},
|
| 115911 |
+
{
|
| 115912 |
+
"epoch": 18.85925925925926,
|
| 115913 |
+
"grad_norm": 0.2089501917362213,
|
| 115914 |
+
"learning_rate": 4.3549794377899244e-07,
|
| 115915 |
+
"loss": 0.6454,
|
| 115916 |
+
"step": 16558
|
| 115917 |
+
},
|
| 115918 |
+
{
|
| 115919 |
+
"epoch": 18.86039886039886,
|
| 115920 |
+
"grad_norm": 0.15604382753372192,
|
| 115921 |
+
"learning_rate": 4.346325097086018e-07,
|
| 115922 |
+
"loss": 0.7389,
|
| 115923 |
+
"step": 16559
|
| 115924 |
+
},
|
| 115925 |
+
{
|
| 115926 |
+
"epoch": 18.861538461538462,
|
| 115927 |
+
"grad_norm": 0.1590043604373932,
|
| 115928 |
+
"learning_rate": 4.337679288609786e-07,
|
| 115929 |
+
"loss": 0.5481,
|
| 115930 |
+
"step": 16560
|
| 115931 |
+
},
|
| 115932 |
+
{
|
| 115933 |
+
"epoch": 18.862678062678064,
|
| 115934 |
+
"grad_norm": 0.18959836661815643,
|
| 115935 |
+
"learning_rate": 4.329042012661544e-07,
|
| 115936 |
+
"loss": 0.823,
|
| 115937 |
+
"step": 16561
|
| 115938 |
+
},
|
| 115939 |
+
{
|
| 115940 |
+
"epoch": 18.863817663817663,
|
| 115941 |
+
"grad_norm": 0.19817541539669037,
|
| 115942 |
+
"learning_rate": 4.320413269541246e-07,
|
| 115943 |
+
"loss": 0.7384,
|
| 115944 |
+
"step": 16562
|
| 115945 |
+
},
|
| 115946 |
+
{
|
| 115947 |
+
"epoch": 18.864957264957265,
|
| 115948 |
+
"grad_norm": 0.21355056762695312,
|
| 115949 |
+
"learning_rate": 4.3117930595486246e-07,
|
| 115950 |
+
"loss": 0.441,
|
| 115951 |
+
"step": 16563
|
| 115952 |
+
},
|
| 115953 |
+
{
|
| 115954 |
+
"epoch": 18.866096866096868,
|
| 115955 |
+
"grad_norm": 0.25660574436187744,
|
| 115956 |
+
"learning_rate": 4.303181382983107e-07,
|
| 115957 |
+
"loss": 0.5913,
|
| 115958 |
+
"step": 16564
|
| 115959 |
+
},
|
| 115960 |
+
{
|
| 115961 |
+
"epoch": 18.867236467236467,
|
| 115962 |
+
"grad_norm": 0.19600361585617065,
|
| 115963 |
+
"learning_rate": 4.2945782401437317e-07,
|
| 115964 |
+
"loss": 0.6876,
|
| 115965 |
+
"step": 16565
|
| 115966 |
+
},
|
| 115967 |
+
{
|
| 115968 |
+
"epoch": 18.86837606837607,
|
| 115969 |
+
"grad_norm": 0.23927044868469238,
|
| 115970 |
+
"learning_rate": 4.285983631329371e-07,
|
| 115971 |
+
"loss": 0.5857,
|
| 115972 |
+
"step": 16566
|
| 115973 |
+
},
|
| 115974 |
+
{
|
| 115975 |
+
"epoch": 18.86951566951567,
|
| 115976 |
+
"grad_norm": 0.22985951602458954,
|
| 115977 |
+
"learning_rate": 4.2773975568384803e-07,
|
| 115978 |
+
"loss": 0.5603,
|
| 115979 |
+
"step": 16567
|
| 115980 |
+
},
|
| 115981 |
+
{
|
| 115982 |
+
"epoch": 18.87065527065527,
|
| 115983 |
+
"grad_norm": 0.15287210047245026,
|
| 115984 |
+
"learning_rate": 4.268820016969349e-07,
|
| 115985 |
+
"loss": 0.6285,
|
| 115986 |
+
"step": 16568
|
| 115987 |
+
},
|
| 115988 |
+
{
|
| 115989 |
+
"epoch": 18.871794871794872,
|
| 115990 |
+
"grad_norm": 0.19072280824184418,
|
| 115991 |
+
"learning_rate": 4.2602510120198504e-07,
|
| 115992 |
+
"loss": 0.6754,
|
| 115993 |
+
"step": 16569
|
| 115994 |
+
},
|
| 115995 |
+
{
|
| 115996 |
+
"epoch": 18.872934472934475,
|
| 115997 |
+
"grad_norm": 0.16646698117256165,
|
| 115998 |
+
"learning_rate": 4.2516905422876343e-07,
|
| 115999 |
+
"loss": 0.5203,
|
| 116000 |
+
"step": 16570
|
| 116001 |
+
},
|
| 116002 |
+
{
|
| 116003 |
+
"epoch": 18.874074074074073,
|
| 116004 |
+
"grad_norm": 0.20522910356521606,
|
| 116005 |
+
"learning_rate": 4.243138608069991e-07,
|
| 116006 |
+
"loss": 0.639,
|
| 116007 |
+
"step": 16571
|
| 116008 |
+
},
|
| 116009 |
+
{
|
| 116010 |
+
"epoch": 18.875213675213676,
|
| 116011 |
+
"grad_norm": 0.22436067461967468,
|
| 116012 |
+
"learning_rate": 4.2345952096639886e-07,
|
| 116013 |
+
"loss": 0.584,
|
| 116014 |
+
"step": 16572
|
| 116015 |
+
},
|
| 116016 |
+
{
|
| 116017 |
+
"epoch": 18.876353276353278,
|
| 116018 |
+
"grad_norm": 0.18598589301109314,
|
| 116019 |
+
"learning_rate": 4.226060347366334e-07,
|
| 116020 |
+
"loss": 0.4548,
|
| 116021 |
+
"step": 16573
|
| 116022 |
+
},
|
| 116023 |
+
{
|
| 116024 |
+
"epoch": 18.877492877492877,
|
| 116025 |
+
"grad_norm": 0.19741828739643097,
|
| 116026 |
+
"learning_rate": 4.2175340214735126e-07,
|
| 116027 |
+
"loss": 0.614,
|
| 116028 |
+
"step": 16574
|
| 116029 |
+
},
|
| 116030 |
+
{
|
| 116031 |
+
"epoch": 18.87863247863248,
|
| 116032 |
+
"grad_norm": 0.1856069564819336,
|
| 116033 |
+
"learning_rate": 4.2090162322816487e-07,
|
| 116034 |
+
"loss": 0.5637,
|
| 116035 |
+
"step": 16575
|
| 116036 |
+
},
|
| 116037 |
+
{
|
| 116038 |
+
"epoch": 18.87977207977208,
|
| 116039 |
+
"grad_norm": 0.16308023035526276,
|
| 116040 |
+
"learning_rate": 4.200506980086533e-07,
|
| 116041 |
+
"loss": 0.6801,
|
| 116042 |
+
"step": 16576
|
| 116043 |
+
},
|
| 116044 |
+
{
|
| 116045 |
+
"epoch": 18.88091168091168,
|
| 116046 |
+
"grad_norm": 0.16635264456272125,
|
| 116047 |
+
"learning_rate": 4.19200626518379e-07,
|
| 116048 |
+
"loss": 0.7788,
|
| 116049 |
+
"step": 16577
|
| 116050 |
+
},
|
| 116051 |
+
{
|
| 116052 |
+
"epoch": 18.882051282051282,
|
| 116053 |
+
"grad_norm": 0.22018620371818542,
|
| 116054 |
+
"learning_rate": 4.1835140878686286e-07,
|
| 116055 |
+
"loss": 0.5671,
|
| 116056 |
+
"step": 16578
|
| 116057 |
+
},
|
| 116058 |
+
{
|
| 116059 |
+
"epoch": 18.883190883190885,
|
| 116060 |
+
"grad_norm": 0.17615817487239838,
|
| 116061 |
+
"learning_rate": 4.175030448436062e-07,
|
| 116062 |
+
"loss": 0.6738,
|
| 116063 |
+
"step": 16579
|
| 116064 |
+
},
|
| 116065 |
+
{
|
| 116066 |
+
"epoch": 18.884330484330484,
|
| 116067 |
+
"grad_norm": 0.2155941277742386,
|
| 116068 |
+
"learning_rate": 4.1665553471806617e-07,
|
| 116069 |
+
"loss": 0.5148,
|
| 116070 |
+
"step": 16580
|
| 116071 |
+
},
|
| 116072 |
+
{
|
| 116073 |
+
"epoch": 18.885470085470086,
|
| 116074 |
+
"grad_norm": 0.2144429236650467,
|
| 116075 |
+
"learning_rate": 4.158088784396857e-07,
|
| 116076 |
+
"loss": 0.7315,
|
| 116077 |
+
"step": 16581
|
| 116078 |
+
},
|
| 116079 |
+
{
|
| 116080 |
+
"epoch": 18.886609686609688,
|
| 116081 |
+
"grad_norm": 0.2490888237953186,
|
| 116082 |
+
"learning_rate": 4.149630760378692e-07,
|
| 116083 |
+
"loss": 0.3359,
|
| 116084 |
+
"step": 16582
|
| 116085 |
+
},
|
| 116086 |
+
{
|
| 116087 |
+
"epoch": 18.887749287749287,
|
| 116088 |
+
"grad_norm": 0.20041395723819733,
|
| 116089 |
+
"learning_rate": 4.1411812754199864e-07,
|
| 116090 |
+
"loss": 0.6532,
|
| 116091 |
+
"step": 16583
|
| 116092 |
+
},
|
| 116093 |
+
{
|
| 116094 |
+
"epoch": 18.88888888888889,
|
| 116095 |
+
"grad_norm": 0.21015304327011108,
|
| 116096 |
+
"learning_rate": 4.1327403298140886e-07,
|
| 116097 |
+
"loss": 0.6025,
|
| 116098 |
+
"step": 16584
|
| 116099 |
+
},
|
| 116100 |
+
{
|
| 116101 |
+
"epoch": 18.89002849002849,
|
| 116102 |
+
"grad_norm": 0.2071191966533661,
|
| 116103 |
+
"learning_rate": 4.124307923854293e-07,
|
| 116104 |
+
"loss": 0.5838,
|
| 116105 |
+
"step": 16585
|
| 116106 |
+
},
|
| 116107 |
+
{
|
| 116108 |
+
"epoch": 18.89116809116809,
|
| 116109 |
+
"grad_norm": 0.20768992602825165,
|
| 116110 |
+
"learning_rate": 4.115884057833447e-07,
|
| 116111 |
+
"loss": 0.3889,
|
| 116112 |
+
"step": 16586
|
| 116113 |
+
},
|
| 116114 |
+
{
|
| 116115 |
+
"epoch": 18.892307692307693,
|
| 116116 |
+
"grad_norm": 0.17783516645431519,
|
| 116117 |
+
"learning_rate": 4.107468732044151e-07,
|
| 116118 |
+
"loss": 0.5076,
|
| 116119 |
+
"step": 16587
|
| 116120 |
+
},
|
| 116121 |
+
{
|
| 116122 |
+
"epoch": 18.893447293447295,
|
| 116123 |
+
"grad_norm": 0.20266379415988922,
|
| 116124 |
+
"learning_rate": 4.099061946778643e-07,
|
| 116125 |
+
"loss": 0.5666,
|
| 116126 |
+
"step": 16588
|
| 116127 |
+
},
|
| 116128 |
+
{
|
| 116129 |
+
"epoch": 18.894586894586894,
|
| 116130 |
+
"grad_norm": 0.1698126494884491,
|
| 116131 |
+
"learning_rate": 4.0906637023289675e-07,
|
| 116132 |
+
"loss": 0.8561,
|
| 116133 |
+
"step": 16589
|
| 116134 |
+
},
|
| 116135 |
+
{
|
| 116136 |
+
"epoch": 18.895726495726496,
|
| 116137 |
+
"grad_norm": 0.30874332785606384,
|
| 116138 |
+
"learning_rate": 4.0822739989867796e-07,
|
| 116139 |
+
"loss": 0.8681,
|
| 116140 |
+
"step": 16590
|
| 116141 |
+
},
|
| 116142 |
+
{
|
| 116143 |
+
"epoch": 18.8968660968661,
|
| 116144 |
+
"grad_norm": 0.21939298510551453,
|
| 116145 |
+
"learning_rate": 4.0738928370435135e-07,
|
| 116146 |
+
"loss": 0.5728,
|
| 116147 |
+
"step": 16591
|
| 116148 |
+
},
|
| 116149 |
+
{
|
| 116150 |
+
"epoch": 18.898005698005697,
|
| 116151 |
+
"grad_norm": 0.23421180248260498,
|
| 116152 |
+
"learning_rate": 4.065520216790214e-07,
|
| 116153 |
+
"loss": 0.5417,
|
| 116154 |
+
"step": 16592
|
| 116155 |
+
},
|
| 116156 |
+
{
|
| 116157 |
+
"epoch": 18.8991452991453,
|
| 116158 |
+
"grad_norm": 0.1682051122188568,
|
| 116159 |
+
"learning_rate": 4.0571561385177313e-07,
|
| 116160 |
+
"loss": 0.7409,
|
| 116161 |
+
"step": 16593
|
| 116162 |
+
},
|
| 116163 |
+
{
|
| 116164 |
+
"epoch": 18.900284900284902,
|
| 116165 |
+
"grad_norm": 0.2224481999874115,
|
| 116166 |
+
"learning_rate": 4.048800602516584e-07,
|
| 116167 |
+
"loss": 0.622,
|
| 116168 |
+
"step": 16594
|
| 116169 |
+
},
|
| 116170 |
+
{
|
| 116171 |
+
"epoch": 18.9014245014245,
|
| 116172 |
+
"grad_norm": 0.1950470209121704,
|
| 116173 |
+
"learning_rate": 4.0404536090769276e-07,
|
| 116174 |
+
"loss": 0.4211,
|
| 116175 |
+
"step": 16595
|
| 116176 |
+
},
|
| 116177 |
+
{
|
| 116178 |
+
"epoch": 18.902564102564103,
|
| 116179 |
+
"grad_norm": 0.25210875272750854,
|
| 116180 |
+
"learning_rate": 4.032115158488725e-07,
|
| 116181 |
+
"loss": 0.5964,
|
| 116182 |
+
"step": 16596
|
| 116183 |
+
},
|
| 116184 |
+
{
|
| 116185 |
+
"epoch": 18.903703703703705,
|
| 116186 |
+
"grad_norm": 0.20010331273078918,
|
| 116187 |
+
"learning_rate": 4.0237852510415787e-07,
|
| 116188 |
+
"loss": 0.8349,
|
| 116189 |
+
"step": 16597
|
| 116190 |
+
},
|
| 116191 |
+
{
|
| 116192 |
+
"epoch": 18.904843304843304,
|
| 116193 |
+
"grad_norm": 0.1533205658197403,
|
| 116194 |
+
"learning_rate": 4.015463887024812e-07,
|
| 116195 |
+
"loss": 0.5577,
|
| 116196 |
+
"step": 16598
|
| 116197 |
+
},
|
| 116198 |
+
{
|
| 116199 |
+
"epoch": 18.905982905982906,
|
| 116200 |
+
"grad_norm": 0.2169148027896881,
|
| 116201 |
+
"learning_rate": 4.0071510667274436e-07,
|
| 116202 |
+
"loss": 0.6079,
|
| 116203 |
+
"step": 16599
|
| 116204 |
+
},
|
| 116205 |
+
{
|
| 116206 |
+
"epoch": 18.90712250712251,
|
| 116207 |
+
"grad_norm": 0.16232413053512573,
|
| 116208 |
+
"learning_rate": 3.9988467904381875e-07,
|
| 116209 |
+
"loss": 0.7123,
|
| 116210 |
+
"step": 16600
|
| 116211 |
+
},
|
| 116212 |
+
{
|
| 116213 |
+
"epoch": 18.908262108262107,
|
| 116214 |
+
"grad_norm": 0.22531388700008392,
|
| 116215 |
+
"learning_rate": 3.990551058445507e-07,
|
| 116216 |
+
"loss": 0.49,
|
| 116217 |
+
"step": 16601
|
| 116218 |
+
},
|
| 116219 |
+
{
|
| 116220 |
+
"epoch": 18.90940170940171,
|
| 116221 |
+
"grad_norm": 0.19916465878486633,
|
| 116222 |
+
"learning_rate": 3.982263871037506e-07,
|
| 116223 |
+
"loss": 0.6874,
|
| 116224 |
+
"step": 16602
|
| 116225 |
+
},
|
| 116226 |
+
{
|
| 116227 |
+
"epoch": 18.910541310541312,
|
| 116228 |
+
"grad_norm": 0.21139632165431976,
|
| 116229 |
+
"learning_rate": 3.9739852285020364e-07,
|
| 116230 |
+
"loss": 0.7132,
|
| 116231 |
+
"step": 16603
|
| 116232 |
+
},
|
| 116233 |
+
{
|
| 116234 |
+
"epoch": 18.91168091168091,
|
| 116235 |
+
"grad_norm": 0.2011098563671112,
|
| 116236 |
+
"learning_rate": 3.9657151311266183e-07,
|
| 116237 |
+
"loss": 0.4991,
|
| 116238 |
+
"step": 16604
|
| 116239 |
+
},
|
| 116240 |
+
{
|
| 116241 |
+
"epoch": 18.912820512820513,
|
| 116242 |
+
"grad_norm": 0.18525491654872894,
|
| 116243 |
+
"learning_rate": 3.9574535791985224e-07,
|
| 116244 |
+
"loss": 0.8635,
|
| 116245 |
+
"step": 16605
|
| 116246 |
+
},
|
| 116247 |
+
{
|
| 116248 |
+
"epoch": 18.913960113960115,
|
| 116249 |
+
"grad_norm": 0.1907753348350525,
|
| 116250 |
+
"learning_rate": 3.9492005730046857e-07,
|
| 116251 |
+
"loss": 0.7534,
|
| 116252 |
+
"step": 16606
|
| 116253 |
+
},
|
| 116254 |
+
{
|
| 116255 |
+
"epoch": 18.915099715099714,
|
| 116256 |
+
"grad_norm": 0.20814749598503113,
|
| 116257 |
+
"learning_rate": 3.9409561128317394e-07,
|
| 116258 |
+
"loss": 0.5925,
|
| 116259 |
+
"step": 16607
|
| 116260 |
+
},
|
| 116261 |
+
{
|
| 116262 |
+
"epoch": 18.916239316239317,
|
| 116263 |
+
"grad_norm": 0.20650431513786316,
|
| 116264 |
+
"learning_rate": 3.9327201989660665e-07,
|
| 116265 |
+
"loss": 0.5924,
|
| 116266 |
+
"step": 16608
|
| 116267 |
+
},
|
| 116268 |
+
{
|
| 116269 |
+
"epoch": 18.91737891737892,
|
| 116270 |
+
"grad_norm": 0.1958417445421219,
|
| 116271 |
+
"learning_rate": 3.9244928316937434e-07,
|
| 116272 |
+
"loss": 0.8564,
|
| 116273 |
+
"step": 16609
|
| 116274 |
+
},
|
| 116275 |
+
{
|
| 116276 |
+
"epoch": 18.918518518518518,
|
| 116277 |
+
"grad_norm": 0.1731303185224533,
|
| 116278 |
+
"learning_rate": 3.91627401130043e-07,
|
| 116279 |
+
"loss": 0.4139,
|
| 116280 |
+
"step": 16610
|
| 116281 |
+
},
|
| 116282 |
+
{
|
| 116283 |
+
"epoch": 18.91965811965812,
|
| 116284 |
+
"grad_norm": 0.17427939176559448,
|
| 116285 |
+
"learning_rate": 3.9080637380717043e-07,
|
| 116286 |
+
"loss": 0.8021,
|
| 116287 |
+
"step": 16611
|
| 116288 |
+
},
|
| 116289 |
+
{
|
| 116290 |
+
"epoch": 18.920797720797722,
|
| 116291 |
+
"grad_norm": 0.15428948402404785,
|
| 116292 |
+
"learning_rate": 3.8998620122926434e-07,
|
| 116293 |
+
"loss": 0.6157,
|
| 116294 |
+
"step": 16612
|
| 116295 |
+
},
|
| 116296 |
+
{
|
| 116297 |
+
"epoch": 18.92193732193732,
|
| 116298 |
+
"grad_norm": 0.28682294487953186,
|
| 116299 |
+
"learning_rate": 3.8916688342481853e-07,
|
| 116300 |
+
"loss": 0.6556,
|
| 116301 |
+
"step": 16613
|
| 116302 |
+
},
|
| 116303 |
+
{
|
| 116304 |
+
"epoch": 18.923076923076923,
|
| 116305 |
+
"grad_norm": 0.22330935299396515,
|
| 116306 |
+
"learning_rate": 3.8834842042228536e-07,
|
| 116307 |
+
"loss": 0.6234,
|
| 116308 |
+
"step": 16614
|
| 116309 |
+
},
|
| 116310 |
+
{
|
| 116311 |
+
"epoch": 18.924216524216526,
|
| 116312 |
+
"grad_norm": 0.1999473124742508,
|
| 116313 |
+
"learning_rate": 3.8753081225009205e-07,
|
| 116314 |
+
"loss": 0.68,
|
| 116315 |
+
"step": 16615
|
| 116316 |
+
},
|
| 116317 |
+
{
|
| 116318 |
+
"epoch": 18.925356125356124,
|
| 116319 |
+
"grad_norm": 0.19231605529785156,
|
| 116320 |
+
"learning_rate": 3.8671405893664083e-07,
|
| 116321 |
+
"loss": 0.6057,
|
| 116322 |
+
"step": 16616
|
| 116323 |
+
},
|
| 116324 |
+
{
|
| 116325 |
+
"epoch": 18.926495726495727,
|
| 116326 |
+
"grad_norm": 0.19735603034496307,
|
| 116327 |
+
"learning_rate": 3.8589816051029514e-07,
|
| 116328 |
+
"loss": 0.7346,
|
| 116329 |
+
"step": 16617
|
| 116330 |
+
},
|
| 116331 |
+
{
|
| 116332 |
+
"epoch": 18.92763532763533,
|
| 116333 |
+
"grad_norm": 0.19661226868629456,
|
| 116334 |
+
"learning_rate": 3.8508311699939626e-07,
|
| 116335 |
+
"loss": 0.6048,
|
| 116336 |
+
"step": 16618
|
| 116337 |
+
},
|
| 116338 |
+
{
|
| 116339 |
+
"epoch": 18.928774928774928,
|
| 116340 |
+
"grad_norm": 0.19001834094524384,
|
| 116341 |
+
"learning_rate": 3.8426892843225205e-07,
|
| 116342 |
+
"loss": 0.6328,
|
| 116343 |
+
"step": 16619
|
| 116344 |
+
},
|
| 116345 |
+
{
|
| 116346 |
+
"epoch": 18.92991452991453,
|
| 116347 |
+
"grad_norm": 0.18638885021209717,
|
| 116348 |
+
"learning_rate": 3.8345559483714265e-07,
|
| 116349 |
+
"loss": 0.5536,
|
| 116350 |
+
"step": 16620
|
| 116351 |
+
},
|
| 116352 |
+
{
|
| 116353 |
+
"epoch": 18.931054131054132,
|
| 116354 |
+
"grad_norm": 0.19392959773540497,
|
| 116355 |
+
"learning_rate": 3.8264311624231497e-07,
|
| 116356 |
+
"loss": 0.4132,
|
| 116357 |
+
"step": 16621
|
| 116358 |
+
},
|
| 116359 |
+
{
|
| 116360 |
+
"epoch": 18.93219373219373,
|
| 116361 |
+
"grad_norm": 0.21309886872768402,
|
| 116362 |
+
"learning_rate": 3.8183149267599083e-07,
|
| 116363 |
+
"loss": 0.5396,
|
| 116364 |
+
"step": 16622
|
| 116365 |
+
},
|
| 116366 |
+
{
|
| 116367 |
+
"epoch": 18.933333333333334,
|
| 116368 |
+
"grad_norm": 0.19839166104793549,
|
| 116369 |
+
"learning_rate": 3.8102072416635604e-07,
|
| 116370 |
+
"loss": 0.7381,
|
| 116371 |
+
"step": 16623
|
| 116372 |
+
},
|
| 116373 |
+
{
|
| 116374 |
+
"epoch": 18.934472934472936,
|
| 116375 |
+
"grad_norm": 0.17323803901672363,
|
| 116376 |
+
"learning_rate": 3.8021081074157694e-07,
|
| 116377 |
+
"loss": 0.6108,
|
| 116378 |
+
"step": 16624
|
| 116379 |
+
},
|
| 116380 |
+
{
|
| 116381 |
+
"epoch": 18.935612535612535,
|
| 116382 |
+
"grad_norm": 0.22733454406261444,
|
| 116383 |
+
"learning_rate": 3.7940175242978105e-07,
|
| 116384 |
+
"loss": 0.6199,
|
| 116385 |
+
"step": 16625
|
| 116386 |
+
},
|
| 116387 |
+
{
|
| 116388 |
+
"epoch": 18.936752136752137,
|
| 116389 |
+
"grad_norm": 0.18306003510951996,
|
| 116390 |
+
"learning_rate": 3.785935492590681e-07,
|
| 116391 |
+
"loss": 0.7503,
|
| 116392 |
+
"step": 16626
|
| 116393 |
+
},
|
| 116394 |
+
{
|
| 116395 |
+
"epoch": 18.93789173789174,
|
| 116396 |
+
"grad_norm": 0.20709216594696045,
|
| 116397 |
+
"learning_rate": 3.7778620125751007e-07,
|
| 116398 |
+
"loss": 0.8921,
|
| 116399 |
+
"step": 16627
|
| 116400 |
+
},
|
| 116401 |
+
{
|
| 116402 |
+
"epoch": 18.939031339031338,
|
| 116403 |
+
"grad_norm": 0.19395828247070312,
|
| 116404 |
+
"learning_rate": 3.769797084531512e-07,
|
| 116405 |
+
"loss": 0.737,
|
| 116406 |
+
"step": 16628
|
| 116407 |
+
},
|
| 116408 |
+
{
|
| 116409 |
+
"epoch": 18.94017094017094,
|
| 116410 |
+
"grad_norm": 0.1846737116575241,
|
| 116411 |
+
"learning_rate": 3.761740708739969e-07,
|
| 116412 |
+
"loss": 0.551,
|
| 116413 |
+
"step": 16629
|
| 116414 |
+
},
|
| 116415 |
+
{
|
| 116416 |
+
"epoch": 18.941310541310543,
|
| 116417 |
+
"grad_norm": 0.20248864591121674,
|
| 116418 |
+
"learning_rate": 3.753692885480359e-07,
|
| 116419 |
+
"loss": 0.5887,
|
| 116420 |
+
"step": 16630
|
| 116421 |
+
},
|
| 116422 |
+
{
|
| 116423 |
+
"epoch": 18.94245014245014,
|
| 116424 |
+
"grad_norm": 0.1981780081987381,
|
| 116425 |
+
"learning_rate": 3.745653615032152e-07,
|
| 116426 |
+
"loss": 0.789,
|
| 116427 |
+
"step": 16631
|
| 116428 |
+
},
|
| 116429 |
+
{
|
| 116430 |
+
"epoch": 18.943589743589744,
|
| 116431 |
+
"grad_norm": 0.19893518090248108,
|
| 116432 |
+
"learning_rate": 3.7376228976746254e-07,
|
| 116433 |
+
"loss": 0.6667,
|
| 116434 |
+
"step": 16632
|
| 116435 |
+
},
|
| 116436 |
+
{
|
| 116437 |
+
"epoch": 18.944729344729346,
|
| 116438 |
+
"grad_norm": 0.20437784492969513,
|
| 116439 |
+
"learning_rate": 3.729600733686639e-07,
|
| 116440 |
+
"loss": 0.4562,
|
| 116441 |
+
"step": 16633
|
| 116442 |
+
},
|
| 116443 |
+
{
|
| 116444 |
+
"epoch": 18.945868945868945,
|
| 116445 |
+
"grad_norm": 0.1854739636182785,
|
| 116446 |
+
"learning_rate": 3.7215871233468866e-07,
|
| 116447 |
+
"loss": 0.7083,
|
| 116448 |
+
"step": 16634
|
| 116449 |
+
},
|
| 116450 |
+
{
|
| 116451 |
+
"epoch": 18.947008547008547,
|
| 116452 |
+
"grad_norm": 0.1786850392818451,
|
| 116453 |
+
"learning_rate": 3.713582066933702e-07,
|
| 116454 |
+
"loss": 0.5751,
|
| 116455 |
+
"step": 16635
|
| 116456 |
+
},
|
| 116457 |
+
{
|
| 116458 |
+
"epoch": 18.94814814814815,
|
| 116459 |
+
"grad_norm": 0.1907089501619339,
|
| 116460 |
+
"learning_rate": 3.705585564725084e-07,
|
| 116461 |
+
"loss": 0.7099,
|
| 116462 |
+
"step": 16636
|
| 116463 |
+
},
|
| 116464 |
+
{
|
| 116465 |
+
"epoch": 18.94928774928775,
|
| 116466 |
+
"grad_norm": 0.19957898557186127,
|
| 116467 |
+
"learning_rate": 3.6975976169987826e-07,
|
| 116468 |
+
"loss": 0.5763,
|
| 116469 |
+
"step": 16637
|
| 116470 |
+
},
|
| 116471 |
+
{
|
| 116472 |
+
"epoch": 18.95042735042735,
|
| 116473 |
+
"grad_norm": 0.1790960133075714,
|
| 116474 |
+
"learning_rate": 3.6896182240322705e-07,
|
| 116475 |
+
"loss": 0.6079,
|
| 116476 |
+
"step": 16638
|
| 116477 |
+
},
|
| 116478 |
+
{
|
| 116479 |
+
"epoch": 18.951566951566953,
|
| 116480 |
+
"grad_norm": 0.18549738824367523,
|
| 116481 |
+
"learning_rate": 3.6816473861026603e-07,
|
| 116482 |
+
"loss": 0.6331,
|
| 116483 |
+
"step": 16639
|
| 116484 |
+
},
|
| 116485 |
+
{
|
| 116486 |
+
"epoch": 18.95270655270655,
|
| 116487 |
+
"grad_norm": 0.2153673768043518,
|
| 116488 |
+
"learning_rate": 3.6736851034868125e-07,
|
| 116489 |
+
"loss": 0.5501,
|
| 116490 |
+
"step": 16640
|
| 116491 |
+
},
|
| 116492 |
+
{
|
| 116493 |
+
"epoch": 18.953846153846154,
|
| 116494 |
+
"grad_norm": 0.23373520374298096,
|
| 116495 |
+
"learning_rate": 3.6657313764612846e-07,
|
| 116496 |
+
"loss": 0.6644,
|
| 116497 |
+
"step": 16641
|
| 116498 |
+
},
|
| 116499 |
+
{
|
| 116500 |
+
"epoch": 18.954985754985756,
|
| 116501 |
+
"grad_norm": 0.19167746603488922,
|
| 116502 |
+
"learning_rate": 3.6577862053023273e-07,
|
| 116503 |
+
"loss": 0.7159,
|
| 116504 |
+
"step": 16642
|
| 116505 |
+
},
|
| 116506 |
+
{
|
| 116507 |
+
"epoch": 18.956125356125355,
|
| 116508 |
+
"grad_norm": 0.19353190064430237,
|
| 116509 |
+
"learning_rate": 3.649849590285914e-07,
|
| 116510 |
+
"loss": 0.7332,
|
| 116511 |
+
"step": 16643
|
| 116512 |
+
},
|
| 116513 |
+
{
|
| 116514 |
+
"epoch": 18.957264957264957,
|
| 116515 |
+
"grad_norm": 0.1788264811038971,
|
| 116516 |
+
"learning_rate": 3.6419215316876587e-07,
|
| 116517 |
+
"loss": 0.5823,
|
| 116518 |
+
"step": 16644
|
| 116519 |
+
},
|
| 116520 |
+
{
|
| 116521 |
+
"epoch": 18.95840455840456,
|
| 116522 |
+
"grad_norm": 0.16599953174591064,
|
| 116523 |
+
"learning_rate": 3.634002029782979e-07,
|
| 116524 |
+
"loss": 0.7679,
|
| 116525 |
+
"step": 16645
|
| 116526 |
+
},
|
| 116527 |
+
{
|
| 116528 |
+
"epoch": 18.95954415954416,
|
| 116529 |
+
"grad_norm": 0.20295533537864685,
|
| 116530 |
+
"learning_rate": 3.6260910848469055e-07,
|
| 116531 |
+
"loss": 0.7243,
|
| 116532 |
+
"step": 16646
|
| 116533 |
+
},
|
| 116534 |
+
{
|
| 116535 |
+
"epoch": 18.96068376068376,
|
| 116536 |
+
"grad_norm": 0.18700546026229858,
|
| 116537 |
+
"learning_rate": 3.6181886971542453e-07,
|
| 116538 |
+
"loss": 0.6274,
|
| 116539 |
+
"step": 16647
|
| 116540 |
+
},
|
| 116541 |
+
{
|
| 116542 |
+
"epoch": 18.961823361823363,
|
| 116543 |
+
"grad_norm": 0.1797284036874771,
|
| 116544 |
+
"learning_rate": 3.610294866979419e-07,
|
| 116545 |
+
"loss": 0.7285,
|
| 116546 |
+
"step": 16648
|
| 116547 |
+
},
|
| 116548 |
+
{
|
| 116549 |
+
"epoch": 18.962962962962962,
|
| 116550 |
+
"grad_norm": 0.20255152881145477,
|
| 116551 |
+
"learning_rate": 3.602409594596623e-07,
|
| 116552 |
+
"loss": 0.5016,
|
| 116553 |
+
"step": 16649
|
| 116554 |
+
},
|
| 116555 |
+
{
|
| 116556 |
+
"epoch": 18.964102564102564,
|
| 116557 |
+
"grad_norm": 0.1935027539730072,
|
| 116558 |
+
"learning_rate": 3.59453288027975e-07,
|
| 116559 |
+
"loss": 0.6789,
|
| 116560 |
+
"step": 16650
|
| 116561 |
+
},
|
| 116562 |
+
{
|
| 116563 |
+
"epoch": 18.965242165242167,
|
| 116564 |
+
"grad_norm": 0.17068713903427124,
|
| 116565 |
+
"learning_rate": 3.5866647243023576e-07,
|
| 116566 |
+
"loss": 0.7219,
|
| 116567 |
+
"step": 16651
|
| 116568 |
+
},
|
| 116569 |
+
{
|
| 116570 |
+
"epoch": 18.966381766381765,
|
| 116571 |
+
"grad_norm": 0.20726989209651947,
|
| 116572 |
+
"learning_rate": 3.5788051269377565e-07,
|
| 116573 |
+
"loss": 0.6189,
|
| 116574 |
+
"step": 16652
|
| 116575 |
+
},
|
| 116576 |
+
{
|
| 116577 |
+
"epoch": 18.967521367521368,
|
| 116578 |
+
"grad_norm": 0.17680394649505615,
|
| 116579 |
+
"learning_rate": 3.5709540884588946e-07,
|
| 116580 |
+
"loss": 0.6625,
|
| 116581 |
+
"step": 16653
|
| 116582 |
+
},
|
| 116583 |
+
{
|
| 116584 |
+
"epoch": 18.96866096866097,
|
| 116585 |
+
"grad_norm": 0.2064637690782547,
|
| 116586 |
+
"learning_rate": 3.5631116091384973e-07,
|
| 116587 |
+
"loss": 0.65,
|
| 116588 |
+
"step": 16654
|
| 116589 |
+
},
|
| 116590 |
+
{
|
| 116591 |
+
"epoch": 18.96980056980057,
|
| 116592 |
+
"grad_norm": 0.18331217765808105,
|
| 116593 |
+
"learning_rate": 3.5552776892489313e-07,
|
| 116594 |
+
"loss": 0.7459,
|
| 116595 |
+
"step": 16655
|
| 116596 |
+
},
|
| 116597 |
+
{
|
| 116598 |
+
"epoch": 18.97094017094017,
|
| 116599 |
+
"grad_norm": 0.1848146915435791,
|
| 116600 |
+
"learning_rate": 3.547452329062284e-07,
|
| 116601 |
+
"loss": 0.538,
|
| 116602 |
+
"step": 16656
|
| 116603 |
+
},
|
| 116604 |
+
{
|
| 116605 |
+
"epoch": 18.972079772079773,
|
| 116606 |
+
"grad_norm": 0.19916410744190216,
|
| 116607 |
+
"learning_rate": 3.5396355288503936e-07,
|
| 116608 |
+
"loss": 0.8061,
|
| 116609 |
+
"step": 16657
|
| 116610 |
+
},
|
| 116611 |
+
{
|
| 116612 |
+
"epoch": 18.973219373219372,
|
| 116613 |
+
"grad_norm": 0.23589324951171875,
|
| 116614 |
+
"learning_rate": 3.5318272888847095e-07,
|
| 116615 |
+
"loss": 0.6315,
|
| 116616 |
+
"step": 16658
|
| 116617 |
+
},
|
| 116618 |
+
{
|
| 116619 |
+
"epoch": 18.974358974358974,
|
| 116620 |
+
"grad_norm": 0.172958105802536,
|
| 116621 |
+
"learning_rate": 3.524027609436459e-07,
|
| 116622 |
+
"loss": 0.7962,
|
| 116623 |
+
"step": 16659
|
| 116624 |
+
},
|
| 116625 |
+
{
|
| 116626 |
+
"epoch": 18.975498575498577,
|
| 116627 |
+
"grad_norm": 0.2174021601676941,
|
| 116628 |
+
"learning_rate": 3.516236490776537e-07,
|
| 116629 |
+
"loss": 0.773,
|
| 116630 |
+
"step": 16660
|
| 116631 |
+
},
|
| 116632 |
+
{
|
| 116633 |
+
"epoch": 18.976638176638176,
|
| 116634 |
+
"grad_norm": 0.15929563343524933,
|
| 116635 |
+
"learning_rate": 3.508453933175532e-07,
|
| 116636 |
+
"loss": 0.78,
|
| 116637 |
+
"step": 16661
|
| 116638 |
+
},
|
| 116639 |
+
{
|
| 116640 |
+
"epoch": 18.977777777777778,
|
| 116641 |
+
"grad_norm": 0.20452843606472015,
|
| 116642 |
+
"learning_rate": 3.500679936903811e-07,
|
| 116643 |
+
"loss": 0.642,
|
| 116644 |
+
"step": 16662
|
| 116645 |
+
},
|
| 116646 |
+
{
|
| 116647 |
+
"epoch": 18.97891737891738,
|
| 116648 |
+
"grad_norm": 0.31728649139404297,
|
| 116649 |
+
"learning_rate": 3.492914502231326e-07,
|
| 116650 |
+
"loss": 0.4238,
|
| 116651 |
+
"step": 16663
|
| 116652 |
+
},
|
| 116653 |
+
{
|
| 116654 |
+
"epoch": 18.98005698005698,
|
| 116655 |
+
"grad_norm": 0.20840921998023987,
|
| 116656 |
+
"learning_rate": 3.485157629427832e-07,
|
| 116657 |
+
"loss": 0.8743,
|
| 116658 |
+
"step": 16664
|
| 116659 |
+
},
|
| 116660 |
+
{
|
| 116661 |
+
"epoch": 18.98119658119658,
|
| 116662 |
+
"grad_norm": 0.1814262568950653,
|
| 116663 |
+
"learning_rate": 3.4774093187627253e-07,
|
| 116664 |
+
"loss": 0.5862,
|
| 116665 |
+
"step": 16665
|
| 116666 |
+
},
|
| 116667 |
+
{
|
| 116668 |
+
"epoch": 18.982336182336184,
|
| 116669 |
+
"grad_norm": 0.2093941867351532,
|
| 116670 |
+
"learning_rate": 3.4696695705051796e-07,
|
| 116671 |
+
"loss": 0.774,
|
| 116672 |
+
"step": 16666
|
| 116673 |
+
},
|
| 116674 |
+
{
|
| 116675 |
+
"epoch": 18.983475783475782,
|
| 116676 |
+
"grad_norm": 0.19571354985237122,
|
| 116677 |
+
"learning_rate": 3.461938384923924e-07,
|
| 116678 |
+
"loss": 0.8105,
|
| 116679 |
+
"step": 16667
|
| 116680 |
+
},
|
| 116681 |
+
{
|
| 116682 |
+
"epoch": 18.984615384615385,
|
| 116683 |
+
"grad_norm": 0.1960640549659729,
|
| 116684 |
+
"learning_rate": 3.4542157622875213e-07,
|
| 116685 |
+
"loss": 0.7862,
|
| 116686 |
+
"step": 16668
|
| 116687 |
+
},
|
| 116688 |
+
{
|
| 116689 |
+
"epoch": 18.985754985754987,
|
| 116690 |
+
"grad_norm": 0.16605553030967712,
|
| 116691 |
+
"learning_rate": 3.44650170286423e-07,
|
| 116692 |
+
"loss": 0.843,
|
| 116693 |
+
"step": 16669
|
| 116694 |
+
},
|
| 116695 |
+
{
|
| 116696 |
+
"epoch": 18.986894586894586,
|
| 116697 |
+
"grad_norm": 0.18908195197582245,
|
| 116698 |
+
"learning_rate": 3.4387962069219737e-07,
|
| 116699 |
+
"loss": 0.676,
|
| 116700 |
+
"step": 16670
|
| 116701 |
+
},
|
| 116702 |
+
{
|
| 116703 |
+
"epoch": 18.988034188034188,
|
| 116704 |
+
"grad_norm": 0.20927131175994873,
|
| 116705 |
+
"learning_rate": 3.4310992747283444e-07,
|
| 116706 |
+
"loss": 0.6447,
|
| 116707 |
+
"step": 16671
|
| 116708 |
+
},
|
| 116709 |
+
{
|
| 116710 |
+
"epoch": 18.98917378917379,
|
| 116711 |
+
"grad_norm": 0.2044903039932251,
|
| 116712 |
+
"learning_rate": 3.4234109065506835e-07,
|
| 116713 |
+
"loss": 0.7814,
|
| 116714 |
+
"step": 16672
|
| 116715 |
+
},
|
| 116716 |
+
{
|
| 116717 |
+
"epoch": 18.99031339031339,
|
| 116718 |
+
"grad_norm": 0.19696706533432007,
|
| 116719 |
+
"learning_rate": 3.415731102656083e-07,
|
| 116720 |
+
"loss": 0.8497,
|
| 116721 |
+
"step": 16673
|
| 116722 |
+
},
|
| 116723 |
+
{
|
| 116724 |
+
"epoch": 18.99145299145299,
|
| 116725 |
+
"grad_norm": 0.2360168695449829,
|
| 116726 |
+
"learning_rate": 3.408059863311247e-07,
|
| 116727 |
+
"loss": 0.5818,
|
| 116728 |
+
"step": 16674
|
| 116729 |
+
},
|
| 116730 |
+
{
|
| 116731 |
+
"epoch": 18.992592592592594,
|
| 116732 |
+
"grad_norm": 0.2683320641517639,
|
| 116733 |
+
"learning_rate": 3.400397188782628e-07,
|
| 116734 |
+
"loss": 0.411,
|
| 116735 |
+
"step": 16675
|
| 116736 |
+
},
|
| 116737 |
+
{
|
| 116738 |
+
"epoch": 18.993732193732193,
|
| 116739 |
+
"grad_norm": 0.1961396485567093,
|
| 116740 |
+
"learning_rate": 3.392743079336347e-07,
|
| 116741 |
+
"loss": 0.832,
|
| 116742 |
+
"step": 16676
|
| 116743 |
+
},
|
| 116744 |
+
{
|
| 116745 |
+
"epoch": 18.994871794871795,
|
| 116746 |
+
"grad_norm": 0.1874992400407791,
|
| 116747 |
+
"learning_rate": 3.385097535238302e-07,
|
| 116748 |
+
"loss": 0.5999,
|
| 116749 |
+
"step": 16677
|
| 116750 |
+
},
|
| 116751 |
+
{
|
| 116752 |
+
"epoch": 18.996011396011397,
|
| 116753 |
+
"grad_norm": 0.1800270676612854,
|
| 116754 |
+
"learning_rate": 3.377460556754003e-07,
|
| 116755 |
+
"loss": 0.3464,
|
| 116756 |
+
"step": 16678
|
| 116757 |
+
},
|
| 116758 |
+
{
|
| 116759 |
+
"epoch": 18.997150997150996,
|
| 116760 |
+
"grad_norm": 0.22845672070980072,
|
| 116761 |
+
"learning_rate": 3.369832144148682e-07,
|
| 116762 |
+
"loss": 0.718,
|
| 116763 |
+
"step": 16679
|
| 116764 |
+
},
|
| 116765 |
+
{
|
| 116766 |
+
"epoch": 18.9982905982906,
|
| 116767 |
+
"grad_norm": 0.17252226173877716,
|
| 116768 |
+
"learning_rate": 3.3622122976873506e-07,
|
| 116769 |
+
"loss": 0.7316,
|
| 116770 |
+
"step": 16680
|
| 116771 |
+
},
|
| 116772 |
+
{
|
| 116773 |
+
"epoch": 18.9994301994302,
|
| 116774 |
+
"grad_norm": 0.20764583349227905,
|
| 116775 |
+
"learning_rate": 3.3546010176346564e-07,
|
| 116776 |
+
"loss": 0.467,
|
| 116777 |
+
"step": 16681
|
| 116778 |
+
},
|
| 116779 |
+
{
|
| 116780 |
+
"epoch": 19.0,
|
| 116781 |
+
"grad_norm": 0.37283486127853394,
|
| 116782 |
+
"learning_rate": 3.3469983042549167e-07,
|
| 116783 |
+
"loss": 0.7267,
|
| 116784 |
+
"step": 16682
|
| 116785 |
+
},
|
| 116786 |
+
{
|
| 116787 |
+
"epoch": 19.001139601139602,
|
| 116788 |
+
"grad_norm": 0.16425985097885132,
|
| 116789 |
+
"learning_rate": 3.3394041578122257e-07,
|
| 116790 |
+
"loss": 0.9197,
|
| 116791 |
+
"step": 16683
|
| 116792 |
+
},
|
| 116793 |
+
{
|
| 116794 |
+
"epoch": 19.0022792022792,
|
| 116795 |
+
"grad_norm": 0.1914873868227005,
|
| 116796 |
+
"learning_rate": 3.3318185785703724e-07,
|
| 116797 |
+
"loss": 0.7005,
|
| 116798 |
+
"step": 16684
|
| 116799 |
+
},
|
| 116800 |
+
{
|
| 116801 |
+
"epoch": 19.003418803418803,
|
| 116802 |
+
"grad_norm": 0.24253977835178375,
|
| 116803 |
+
"learning_rate": 3.324241566792785e-07,
|
| 116804 |
+
"loss": 0.3597,
|
| 116805 |
+
"step": 16685
|
| 116806 |
+
},
|
| 116807 |
+
{
|
| 116808 |
+
"epoch": 19.004558404558406,
|
| 116809 |
+
"grad_norm": 0.19261282682418823,
|
| 116810 |
+
"learning_rate": 3.3166731227426693e-07,
|
| 116811 |
+
"loss": 0.733,
|
| 116812 |
+
"step": 16686
|
| 116813 |
+
},
|
| 116814 |
+
{
|
| 116815 |
+
"epoch": 19.005698005698004,
|
| 116816 |
+
"grad_norm": 0.19126534461975098,
|
| 116817 |
+
"learning_rate": 3.3091132466828435e-07,
|
| 116818 |
+
"loss": 0.5217,
|
| 116819 |
+
"step": 16687
|
| 116820 |
+
},
|
| 116821 |
+
{
|
| 116822 |
+
"epoch": 19.006837606837607,
|
| 116823 |
+
"grad_norm": 0.20644229650497437,
|
| 116824 |
+
"learning_rate": 3.3015619388759576e-07,
|
| 116825 |
+
"loss": 0.7747,
|
| 116826 |
+
"step": 16688
|
| 116827 |
+
},
|
| 116828 |
+
{
|
| 116829 |
+
"epoch": 19.00797720797721,
|
| 116830 |
+
"grad_norm": 0.22872857749462128,
|
| 116831 |
+
"learning_rate": 3.2940191995842196e-07,
|
| 116832 |
+
"loss": 0.5749,
|
| 116833 |
+
"step": 16689
|
| 116834 |
+
},
|
| 116835 |
+
{
|
| 116836 |
+
"epoch": 19.009116809116808,
|
| 116837 |
+
"grad_norm": 0.19443026185035706,
|
| 116838 |
+
"learning_rate": 3.286485029069641e-07,
|
| 116839 |
+
"loss": 0.6413,
|
| 116840 |
+
"step": 16690
|
| 116841 |
+
},
|
| 116842 |
+
{
|
| 116843 |
+
"epoch": 19.01025641025641,
|
| 116844 |
+
"grad_norm": 0.20380333065986633,
|
| 116845 |
+
"learning_rate": 3.278959427593903e-07,
|
| 116846 |
+
"loss": 0.8625,
|
| 116847 |
+
"step": 16691
|
| 116848 |
+
},
|
| 116849 |
+
{
|
| 116850 |
+
"epoch": 19.011396011396013,
|
| 116851 |
+
"grad_norm": 0.22462737560272217,
|
| 116852 |
+
"learning_rate": 3.271442395418406e-07,
|
| 116853 |
+
"loss": 0.6351,
|
| 116854 |
+
"step": 16692
|
| 116855 |
+
},
|
| 116856 |
+
{
|
| 116857 |
+
"epoch": 19.01253561253561,
|
| 116858 |
+
"grad_norm": 0.1561097949743271,
|
| 116859 |
+
"learning_rate": 3.263933932804192e-07,
|
| 116860 |
+
"loss": 0.7111,
|
| 116861 |
+
"step": 16693
|
| 116862 |
+
},
|
| 116863 |
+
{
|
| 116864 |
+
"epoch": 19.013675213675214,
|
| 116865 |
+
"grad_norm": 0.2274744063615799,
|
| 116866 |
+
"learning_rate": 3.25643404001208e-07,
|
| 116867 |
+
"loss": 0.4553,
|
| 116868 |
+
"step": 16694
|
| 116869 |
+
},
|
| 116870 |
+
{
|
| 116871 |
+
"epoch": 19.014814814814816,
|
| 116872 |
+
"grad_norm": 0.18747691810131073,
|
| 116873 |
+
"learning_rate": 3.248942717302583e-07,
|
| 116874 |
+
"loss": 0.4617,
|
| 116875 |
+
"step": 16695
|
| 116876 |
+
},
|
| 116877 |
+
{
|
| 116878 |
+
"epoch": 19.015954415954415,
|
| 116879 |
+
"grad_norm": 0.16808752715587616,
|
| 116880 |
+
"learning_rate": 3.2414599649358837e-07,
|
| 116881 |
+
"loss": 0.5426,
|
| 116882 |
+
"step": 16696
|
| 116883 |
+
},
|
| 116884 |
+
{
|
| 116885 |
+
"epoch": 19.017094017094017,
|
| 116886 |
+
"grad_norm": 0.19207409024238586,
|
| 116887 |
+
"learning_rate": 3.233985783171828e-07,
|
| 116888 |
+
"loss": 0.6627,
|
| 116889 |
+
"step": 16697
|
| 116890 |
+
},
|
| 116891 |
+
{
|
| 116892 |
+
"epoch": 19.01823361823362,
|
| 116893 |
+
"grad_norm": 0.17813901603221893,
|
| 116894 |
+
"learning_rate": 3.2265201722700976e-07,
|
| 116895 |
+
"loss": 0.7791,
|
| 116896 |
+
"step": 16698
|
| 116897 |
+
},
|
| 116898 |
+
{
|
| 116899 |
+
"epoch": 19.019373219373218,
|
| 116900 |
+
"grad_norm": 0.17404155433177948,
|
| 116901 |
+
"learning_rate": 3.2190631324899303e-07,
|
| 116902 |
+
"loss": 0.5857,
|
| 116903 |
+
"step": 16699
|
| 116904 |
+
},
|
| 116905 |
+
{
|
| 116906 |
+
"epoch": 19.02051282051282,
|
| 116907 |
+
"grad_norm": 0.1948934644460678,
|
| 116908 |
+
"learning_rate": 3.21161466409034e-07,
|
| 116909 |
+
"loss": 0.7377,
|
| 116910 |
+
"step": 16700
|
| 116911 |
+
},
|
| 116912 |
+
{
|
| 116913 |
+
"epoch": 19.021652421652423,
|
| 116914 |
+
"grad_norm": 0.16898861527442932,
|
| 116915 |
+
"learning_rate": 3.204174767330065e-07,
|
| 116916 |
+
"loss": 0.5093,
|
| 116917 |
+
"step": 16701
|
| 116918 |
+
},
|
| 116919 |
+
{
|
| 116920 |
+
"epoch": 19.02279202279202,
|
| 116921 |
+
"grad_norm": 0.1974274218082428,
|
| 116922 |
+
"learning_rate": 3.1967434424674815e-07,
|
| 116923 |
+
"loss": 0.5356,
|
| 116924 |
+
"step": 16702
|
| 116925 |
+
},
|
| 116926 |
+
{
|
| 116927 |
+
"epoch": 19.023931623931624,
|
| 116928 |
+
"grad_norm": 0.17614935338497162,
|
| 116929 |
+
"learning_rate": 3.1893206897607164e-07,
|
| 116930 |
+
"loss": 0.5183,
|
| 116931 |
+
"step": 16703
|
| 116932 |
+
},
|
| 116933 |
+
{
|
| 116934 |
+
"epoch": 19.025071225071226,
|
| 116935 |
+
"grad_norm": 0.20292778313159943,
|
| 116936 |
+
"learning_rate": 3.1819065094675635e-07,
|
| 116937 |
+
"loss": 0.77,
|
| 116938 |
+
"step": 16704
|
| 116939 |
+
},
|
| 116940 |
+
{
|
| 116941 |
+
"epoch": 19.026210826210825,
|
| 116942 |
+
"grad_norm": 0.20143486559391022,
|
| 116943 |
+
"learning_rate": 3.1745009018455396e-07,
|
| 116944 |
+
"loss": 0.4547,
|
| 116945 |
+
"step": 16705
|
| 116946 |
+
},
|
| 116947 |
+
{
|
| 116948 |
+
"epoch": 19.027350427350427,
|
| 116949 |
+
"grad_norm": 0.15823499858379364,
|
| 116950 |
+
"learning_rate": 3.1671038671518825e-07,
|
| 116951 |
+
"loss": 0.7027,
|
| 116952 |
+
"step": 16706
|
| 116953 |
+
},
|
| 116954 |
+
{
|
| 116955 |
+
"epoch": 19.02849002849003,
|
| 116956 |
+
"grad_norm": 0.19536300003528595,
|
| 116957 |
+
"learning_rate": 3.159715405643526e-07,
|
| 116958 |
+
"loss": 0.7036,
|
| 116959 |
+
"step": 16707
|
| 116960 |
+
},
|
| 116961 |
+
{
|
| 116962 |
+
"epoch": 19.02962962962963,
|
| 116963 |
+
"grad_norm": 0.15541143715381622,
|
| 116964 |
+
"learning_rate": 3.152335517577043e-07,
|
| 116965 |
+
"loss": 0.8638,
|
| 116966 |
+
"step": 16708
|
| 116967 |
+
},
|
| 116968 |
+
{
|
| 116969 |
+
"epoch": 19.03076923076923,
|
| 116970 |
+
"grad_norm": 0.22746704518795013,
|
| 116971 |
+
"learning_rate": 3.144964203208783e-07,
|
| 116972 |
+
"loss": 0.6358,
|
| 116973 |
+
"step": 16709
|
| 116974 |
+
},
|
| 116975 |
+
{
|
| 116976 |
+
"epoch": 19.031908831908833,
|
| 116977 |
+
"grad_norm": 0.18629853427410126,
|
| 116978 |
+
"learning_rate": 3.137601462794765e-07,
|
| 116979 |
+
"loss": 0.7183,
|
| 116980 |
+
"step": 16710
|
| 116981 |
+
},
|
| 116982 |
+
{
|
| 116983 |
+
"epoch": 19.03304843304843,
|
| 116984 |
+
"grad_norm": 0.2248799353837967,
|
| 116985 |
+
"learning_rate": 3.1302472965907547e-07,
|
| 116986 |
+
"loss": 0.4789,
|
| 116987 |
+
"step": 16711
|
| 116988 |
+
},
|
| 116989 |
+
{
|
| 116990 |
+
"epoch": 19.034188034188034,
|
| 116991 |
+
"grad_norm": 0.2067008763551712,
|
| 116992 |
+
"learning_rate": 3.122901704852133e-07,
|
| 116993 |
+
"loss": 0.5447,
|
| 116994 |
+
"step": 16712
|
| 116995 |
+
},
|
| 116996 |
+
{
|
| 116997 |
+
"epoch": 19.035327635327636,
|
| 116998 |
+
"grad_norm": 0.24397136270999908,
|
| 116999 |
+
"learning_rate": 3.1155646878340274e-07,
|
| 117000 |
+
"loss": 0.716,
|
| 117001 |
+
"step": 16713
|
| 117002 |
+
},
|
| 117003 |
+
{
|
| 117004 |
+
"epoch": 19.036467236467235,
|
| 117005 |
+
"grad_norm": 0.1896265745162964,
|
| 117006 |
+
"learning_rate": 3.108236245791318e-07,
|
| 117007 |
+
"loss": 0.758,
|
| 117008 |
+
"step": 16714
|
| 117009 |
+
},
|
| 117010 |
+
{
|
| 117011 |
+
"epoch": 19.037606837606837,
|
| 117012 |
+
"grad_norm": 0.20399513840675354,
|
| 117013 |
+
"learning_rate": 3.1009163789785244e-07,
|
| 117014 |
+
"loss": 0.7326,
|
| 117015 |
+
"step": 16715
|
| 117016 |
+
},
|
| 117017 |
+
{
|
| 117018 |
+
"epoch": 19.03874643874644,
|
| 117019 |
+
"grad_norm": 0.20660154521465302,
|
| 117020 |
+
"learning_rate": 3.0936050876498314e-07,
|
| 117021 |
+
"loss": 0.6979,
|
| 117022 |
+
"step": 16716
|
| 117023 |
+
},
|
| 117024 |
+
{
|
| 117025 |
+
"epoch": 19.03988603988604,
|
| 117026 |
+
"grad_norm": 0.16227521002292633,
|
| 117027 |
+
"learning_rate": 3.0863023720592577e-07,
|
| 117028 |
+
"loss": 0.573,
|
| 117029 |
+
"step": 16717
|
| 117030 |
+
},
|
| 117031 |
+
{
|
| 117032 |
+
"epoch": 19.04102564102564,
|
| 117033 |
+
"grad_norm": 0.18760254979133606,
|
| 117034 |
+
"learning_rate": 3.079008232460462e-07,
|
| 117035 |
+
"loss": 0.7601,
|
| 117036 |
+
"step": 16718
|
| 117037 |
+
},
|
| 117038 |
+
{
|
| 117039 |
+
"epoch": 19.042165242165243,
|
| 117040 |
+
"grad_norm": 0.20014169812202454,
|
| 117041 |
+
"learning_rate": 3.071722669106686e-07,
|
| 117042 |
+
"loss": 0.8153,
|
| 117043 |
+
"step": 16719
|
| 117044 |
+
},
|
| 117045 |
+
{
|
| 117046 |
+
"epoch": 19.043304843304842,
|
| 117047 |
+
"grad_norm": 0.17412996292114258,
|
| 117048 |
+
"learning_rate": 3.0644456822510603e-07,
|
| 117049 |
+
"loss": 0.4703,
|
| 117050 |
+
"step": 16720
|
| 117051 |
+
},
|
| 117052 |
+
{
|
| 117053 |
+
"epoch": 19.044444444444444,
|
| 117054 |
+
"grad_norm": 0.2562756836414337,
|
| 117055 |
+
"learning_rate": 3.0571772721462997e-07,
|
| 117056 |
+
"loss": 0.8207,
|
| 117057 |
+
"step": 16721
|
| 117058 |
+
},
|
| 117059 |
+
{
|
| 117060 |
+
"epoch": 19.045584045584047,
|
| 117061 |
+
"grad_norm": 0.18322330713272095,
|
| 117062 |
+
"learning_rate": 3.049917439044897e-07,
|
| 117063 |
+
"loss": 0.6766,
|
| 117064 |
+
"step": 16722
|
| 117065 |
+
},
|
| 117066 |
+
{
|
| 117067 |
+
"epoch": 19.046723646723645,
|
| 117068 |
+
"grad_norm": 0.21178914606571198,
|
| 117069 |
+
"learning_rate": 3.0426661831989557e-07,
|
| 117070 |
+
"loss": 0.7312,
|
| 117071 |
+
"step": 16723
|
| 117072 |
+
},
|
| 117073 |
+
{
|
| 117074 |
+
"epoch": 19.047863247863248,
|
| 117075 |
+
"grad_norm": 0.18006394803524017,
|
| 117076 |
+
"learning_rate": 3.0354235048603574e-07,
|
| 117077 |
+
"loss": 0.6363,
|
| 117078 |
+
"step": 16724
|
| 117079 |
+
},
|
| 117080 |
+
{
|
| 117081 |
+
"epoch": 19.04900284900285,
|
| 117082 |
+
"grad_norm": 0.15349280834197998,
|
| 117083 |
+
"learning_rate": 3.028189404280651e-07,
|
| 117084 |
+
"loss": 0.6743,
|
| 117085 |
+
"step": 16725
|
| 117086 |
+
},
|
| 117087 |
+
{
|
| 117088 |
+
"epoch": 19.05014245014245,
|
| 117089 |
+
"grad_norm": 0.19188684225082397,
|
| 117090 |
+
"learning_rate": 3.0209638817111364e-07,
|
| 117091 |
+
"loss": 0.6271,
|
| 117092 |
+
"step": 16726
|
| 117093 |
+
},
|
| 117094 |
+
{
|
| 117095 |
+
"epoch": 19.05128205128205,
|
| 117096 |
+
"grad_norm": 0.22258351743221283,
|
| 117097 |
+
"learning_rate": 3.013746937402667e-07,
|
| 117098 |
+
"loss": 0.6821,
|
| 117099 |
+
"step": 16727
|
| 117100 |
+
},
|
| 117101 |
+
{
|
| 117102 |
+
"epoch": 19.052421652421653,
|
| 117103 |
+
"grad_norm": 0.16944876313209534,
|
| 117104 |
+
"learning_rate": 3.006538571606043e-07,
|
| 117105 |
+
"loss": 0.538,
|
| 117106 |
+
"step": 16728
|
| 117107 |
+
},
|
| 117108 |
+
{
|
| 117109 |
+
"epoch": 19.053561253561252,
|
| 117110 |
+
"grad_norm": 0.18892964720726013,
|
| 117111 |
+
"learning_rate": 2.999338784571565e-07,
|
| 117112 |
+
"loss": 0.5521,
|
| 117113 |
+
"step": 16729
|
| 117114 |
+
},
|
| 117115 |
+
{
|
| 117116 |
+
"epoch": 19.054700854700855,
|
| 117117 |
+
"grad_norm": 0.16914568841457367,
|
| 117118 |
+
"learning_rate": 2.9921475765492814e-07,
|
| 117119 |
+
"loss": 0.5762,
|
| 117120 |
+
"step": 16730
|
| 117121 |
+
},
|
| 117122 |
+
{
|
| 117123 |
+
"epoch": 19.055840455840457,
|
| 117124 |
+
"grad_norm": 0.2065064162015915,
|
| 117125 |
+
"learning_rate": 2.984964947788993e-07,
|
| 117126 |
+
"loss": 0.651,
|
| 117127 |
+
"step": 16731
|
| 117128 |
+
},
|
| 117129 |
+
{
|
| 117130 |
+
"epoch": 19.056980056980056,
|
| 117131 |
+
"grad_norm": 0.17748785018920898,
|
| 117132 |
+
"learning_rate": 2.977790898540167e-07,
|
| 117133 |
+
"loss": 0.7335,
|
| 117134 |
+
"step": 16732
|
| 117135 |
+
},
|
| 117136 |
+
{
|
| 117137 |
+
"epoch": 19.058119658119658,
|
| 117138 |
+
"grad_norm": 0.1709255576133728,
|
| 117139 |
+
"learning_rate": 2.970625429051993e-07,
|
| 117140 |
+
"loss": 0.6767,
|
| 117141 |
+
"step": 16733
|
| 117142 |
+
},
|
| 117143 |
+
{
|
| 117144 |
+
"epoch": 19.05925925925926,
|
| 117145 |
+
"grad_norm": 0.17027568817138672,
|
| 117146 |
+
"learning_rate": 2.9634685395733e-07,
|
| 117147 |
+
"loss": 0.8709,
|
| 117148 |
+
"step": 16734
|
| 117149 |
+
},
|
| 117150 |
+
{
|
| 117151 |
+
"epoch": 19.06039886039886,
|
| 117152 |
+
"grad_norm": 0.2431151568889618,
|
| 117153 |
+
"learning_rate": 2.9563202303527213e-07,
|
| 117154 |
+
"loss": 0.5081,
|
| 117155 |
+
"step": 16735
|
| 117156 |
+
},
|
| 117157 |
+
{
|
| 117158 |
+
"epoch": 19.06153846153846,
|
| 117159 |
+
"grad_norm": 0.1895310878753662,
|
| 117160 |
+
"learning_rate": 2.9491805016385044e-07,
|
| 117161 |
+
"loss": 0.7936,
|
| 117162 |
+
"step": 16736
|
| 117163 |
+
},
|
| 117164 |
+
{
|
| 117165 |
+
"epoch": 19.062678062678064,
|
| 117166 |
+
"grad_norm": 0.15086257457733154,
|
| 117167 |
+
"learning_rate": 2.9420493536786443e-07,
|
| 117168 |
+
"loss": 0.6619,
|
| 117169 |
+
"step": 16737
|
| 117170 |
+
},
|
| 117171 |
+
{
|
| 117172 |
+
"epoch": 19.063817663817662,
|
| 117173 |
+
"grad_norm": 0.18453842401504517,
|
| 117174 |
+
"learning_rate": 2.934926786720832e-07,
|
| 117175 |
+
"loss": 0.6104,
|
| 117176 |
+
"step": 16738
|
| 117177 |
+
},
|
| 117178 |
+
{
|
| 117179 |
+
"epoch": 19.064957264957265,
|
| 117180 |
+
"grad_norm": 0.15260860323905945,
|
| 117181 |
+
"learning_rate": 2.927812801012425e-07,
|
| 117182 |
+
"loss": 0.6298,
|
| 117183 |
+
"step": 16739
|
| 117184 |
+
},
|
| 117185 |
+
{
|
| 117186 |
+
"epoch": 19.066096866096867,
|
| 117187 |
+
"grad_norm": 0.22402848303318024,
|
| 117188 |
+
"learning_rate": 2.920707396800532e-07,
|
| 117189 |
+
"loss": 0.5562,
|
| 117190 |
+
"step": 16740
|
| 117191 |
+
},
|
| 117192 |
+
{
|
| 117193 |
+
"epoch": 19.067236467236466,
|
| 117194 |
+
"grad_norm": 0.21188044548034668,
|
| 117195 |
+
"learning_rate": 2.913610574331954e-07,
|
| 117196 |
+
"loss": 0.646,
|
| 117197 |
+
"step": 16741
|
| 117198 |
+
},
|
| 117199 |
+
{
|
| 117200 |
+
"epoch": 19.068376068376068,
|
| 117201 |
+
"grad_norm": 0.22330155968666077,
|
| 117202 |
+
"learning_rate": 2.906522333853162e-07,
|
| 117203 |
+
"loss": 0.5249,
|
| 117204 |
+
"step": 16742
|
| 117205 |
+
},
|
| 117206 |
+
{
|
| 117207 |
+
"epoch": 19.06951566951567,
|
| 117208 |
+
"grad_norm": 0.20336827635765076,
|
| 117209 |
+
"learning_rate": 2.8994426756103755e-07,
|
| 117210 |
+
"loss": 0.6334,
|
| 117211 |
+
"step": 16743
|
| 117212 |
+
},
|
| 117213 |
+
{
|
| 117214 |
+
"epoch": 19.07065527065527,
|
| 117215 |
+
"grad_norm": 0.1761748343706131,
|
| 117216 |
+
"learning_rate": 2.892371599849453e-07,
|
| 117217 |
+
"loss": 0.7732,
|
| 117218 |
+
"step": 16744
|
| 117219 |
+
},
|
| 117220 |
+
{
|
| 117221 |
+
"epoch": 19.07179487179487,
|
| 117222 |
+
"grad_norm": 0.16703934967517853,
|
| 117223 |
+
"learning_rate": 2.885309106816031e-07,
|
| 117224 |
+
"loss": 0.8338,
|
| 117225 |
+
"step": 16745
|
| 117226 |
+
},
|
| 117227 |
+
{
|
| 117228 |
+
"epoch": 19.072934472934474,
|
| 117229 |
+
"grad_norm": 0.18746022880077362,
|
| 117230 |
+
"learning_rate": 2.8782551967553863e-07,
|
| 117231 |
+
"loss": 0.8131,
|
| 117232 |
+
"step": 16746
|
| 117233 |
+
},
|
| 117234 |
+
{
|
| 117235 |
+
"epoch": 19.074074074074073,
|
| 117236 |
+
"grad_norm": 0.20968621969223022,
|
| 117237 |
+
"learning_rate": 2.8712098699125166e-07,
|
| 117238 |
+
"loss": 0.5618,
|
| 117239 |
+
"step": 16747
|
| 117240 |
+
},
|
| 117241 |
+
{
|
| 117242 |
+
"epoch": 19.075213675213675,
|
| 117243 |
+
"grad_norm": 0.25017520785331726,
|
| 117244 |
+
"learning_rate": 2.864173126532144e-07,
|
| 117245 |
+
"loss": 0.6002,
|
| 117246 |
+
"step": 16748
|
| 117247 |
+
},
|
| 117248 |
+
{
|
| 117249 |
+
"epoch": 19.076353276353277,
|
| 117250 |
+
"grad_norm": 0.18701662123203278,
|
| 117251 |
+
"learning_rate": 2.857144966858655e-07,
|
| 117252 |
+
"loss": 0.4895,
|
| 117253 |
+
"step": 16749
|
| 117254 |
+
},
|
| 117255 |
+
{
|
| 117256 |
+
"epoch": 19.077492877492876,
|
| 117257 |
+
"grad_norm": 0.20848360657691956,
|
| 117258 |
+
"learning_rate": 2.85012539113616e-07,
|
| 117259 |
+
"loss": 0.6475,
|
| 117260 |
+
"step": 16750
|
| 117261 |
+
},
|
| 117262 |
+
{
|
| 117263 |
+
"epoch": 19.07863247863248,
|
| 117264 |
+
"grad_norm": 0.2461550384759903,
|
| 117265 |
+
"learning_rate": 2.843114399608493e-07,
|
| 117266 |
+
"loss": 0.4977,
|
| 117267 |
+
"step": 16751
|
| 117268 |
+
},
|
| 117269 |
+
{
|
| 117270 |
+
"epoch": 19.07977207977208,
|
| 117271 |
+
"grad_norm": 0.18037942051887512,
|
| 117272 |
+
"learning_rate": 2.8361119925191527e-07,
|
| 117273 |
+
"loss": 0.7748,
|
| 117274 |
+
"step": 16752
|
| 117275 |
+
},
|
| 117276 |
+
{
|
| 117277 |
+
"epoch": 19.08091168091168,
|
| 117278 |
+
"grad_norm": 0.18490144610404968,
|
| 117279 |
+
"learning_rate": 2.8291181701113336e-07,
|
| 117280 |
+
"loss": 0.5969,
|
| 117281 |
+
"step": 16753
|
| 117282 |
+
},
|
| 117283 |
+
{
|
| 117284 |
+
"epoch": 19.08205128205128,
|
| 117285 |
+
"grad_norm": 0.1518007069826126,
|
| 117286 |
+
"learning_rate": 2.822132932627952e-07,
|
| 117287 |
+
"loss": 0.672,
|
| 117288 |
+
"step": 16754
|
| 117289 |
+
},
|
| 117290 |
+
{
|
| 117291 |
+
"epoch": 19.083190883190884,
|
| 117292 |
+
"grad_norm": 0.16539475321769714,
|
| 117293 |
+
"learning_rate": 2.8151562803116485e-07,
|
| 117294 |
+
"loss": 0.7403,
|
| 117295 |
+
"step": 16755
|
| 117296 |
+
},
|
| 117297 |
+
{
|
| 117298 |
+
"epoch": 19.084330484330483,
|
| 117299 |
+
"grad_norm": 0.19967080652713776,
|
| 117300 |
+
"learning_rate": 2.8081882134047554e-07,
|
| 117301 |
+
"loss": 0.7216,
|
| 117302 |
+
"step": 16756
|
| 117303 |
+
},
|
| 117304 |
+
{
|
| 117305 |
+
"epoch": 19.085470085470085,
|
| 117306 |
+
"grad_norm": 0.17736263573169708,
|
| 117307 |
+
"learning_rate": 2.801228732149247e-07,
|
| 117308 |
+
"loss": 0.5971,
|
| 117309 |
+
"step": 16757
|
| 117310 |
+
},
|
| 117311 |
+
{
|
| 117312 |
+
"epoch": 19.086609686609687,
|
| 117313 |
+
"grad_norm": 0.1553797423839569,
|
| 117314 |
+
"learning_rate": 2.7942778367868463e-07,
|
| 117315 |
+
"loss": 0.7746,
|
| 117316 |
+
"step": 16758
|
| 117317 |
+
},
|
| 117318 |
+
{
|
| 117319 |
+
"epoch": 19.087749287749286,
|
| 117320 |
+
"grad_norm": 0.2018582671880722,
|
| 117321 |
+
"learning_rate": 2.7873355275589985e-07,
|
| 117322 |
+
"loss": 0.7954,
|
| 117323 |
+
"step": 16759
|
| 117324 |
+
},
|
| 117325 |
+
{
|
| 117326 |
+
"epoch": 19.08888888888889,
|
| 117327 |
+
"grad_norm": 0.2148551046848297,
|
| 117328 |
+
"learning_rate": 2.780401804706845e-07,
|
| 117329 |
+
"loss": 0.7441,
|
| 117330 |
+
"step": 16760
|
| 117331 |
+
},
|
| 117332 |
+
{
|
| 117333 |
+
"epoch": 19.09002849002849,
|
| 117334 |
+
"grad_norm": 0.2104743868112564,
|
| 117335 |
+
"learning_rate": 2.773476668471164e-07,
|
| 117336 |
+
"loss": 0.6336,
|
| 117337 |
+
"step": 16761
|
| 117338 |
+
},
|
| 117339 |
+
{
|
| 117340 |
+
"epoch": 19.09116809116809,
|
| 117341 |
+
"grad_norm": 0.26149600744247437,
|
| 117342 |
+
"learning_rate": 2.766560119092515e-07,
|
| 117343 |
+
"loss": 0.3598,
|
| 117344 |
+
"step": 16762
|
| 117345 |
+
},
|
| 117346 |
+
{
|
| 117347 |
+
"epoch": 19.092307692307692,
|
| 117348 |
+
"grad_norm": 0.1936904639005661,
|
| 117349 |
+
"learning_rate": 2.7596521568111487e-07,
|
| 117350 |
+
"loss": 0.6437,
|
| 117351 |
+
"step": 16763
|
| 117352 |
+
},
|
| 117353 |
+
{
|
| 117354 |
+
"epoch": 19.093447293447294,
|
| 117355 |
+
"grad_norm": 0.18678182363510132,
|
| 117356 |
+
"learning_rate": 2.7527527818669854e-07,
|
| 117357 |
+
"loss": 0.753,
|
| 117358 |
+
"step": 16764
|
| 117359 |
+
},
|
| 117360 |
+
{
|
| 117361 |
+
"epoch": 19.094586894586893,
|
| 117362 |
+
"grad_norm": 0.196497842669487,
|
| 117363 |
+
"learning_rate": 2.7458619944996376e-07,
|
| 117364 |
+
"loss": 0.7477,
|
| 117365 |
+
"step": 16765
|
| 117366 |
+
},
|
| 117367 |
+
{
|
| 117368 |
+
"epoch": 19.095726495726495,
|
| 117369 |
+
"grad_norm": 0.16993390023708344,
|
| 117370 |
+
"learning_rate": 2.738979794948443e-07,
|
| 117371 |
+
"loss": 0.7557,
|
| 117372 |
+
"step": 16766
|
| 117373 |
+
},
|
| 117374 |
+
{
|
| 117375 |
+
"epoch": 19.096866096866098,
|
| 117376 |
+
"grad_norm": 0.1916753053665161,
|
| 117377 |
+
"learning_rate": 2.73210618345246e-07,
|
| 117378 |
+
"loss": 0.6982,
|
| 117379 |
+
"step": 16767
|
| 117380 |
+
},
|
| 117381 |
+
{
|
| 117382 |
+
"epoch": 19.098005698005696,
|
| 117383 |
+
"grad_norm": 0.18876004219055176,
|
| 117384 |
+
"learning_rate": 2.725241160250414e-07,
|
| 117385 |
+
"loss": 0.8045,
|
| 117386 |
+
"step": 16768
|
| 117387 |
+
},
|
| 117388 |
+
{
|
| 117389 |
+
"epoch": 19.0991452991453,
|
| 117390 |
+
"grad_norm": 0.1865483969449997,
|
| 117391 |
+
"learning_rate": 2.718384725580753e-07,
|
| 117392 |
+
"loss": 0.7067,
|
| 117393 |
+
"step": 16769
|
| 117394 |
+
},
|
| 117395 |
+
{
|
| 117396 |
+
"epoch": 19.1002849002849,
|
| 117397 |
+
"grad_norm": 0.15715231001377106,
|
| 117398 |
+
"learning_rate": 2.7115368796816196e-07,
|
| 117399 |
+
"loss": 0.6819,
|
| 117400 |
+
"step": 16770
|
| 117401 |
+
},
|
| 117402 |
+
{
|
| 117403 |
+
"epoch": 19.1014245014245,
|
| 117404 |
+
"grad_norm": 0.23827806115150452,
|
| 117405 |
+
"learning_rate": 2.7046976227908803e-07,
|
| 117406 |
+
"loss": 0.6992,
|
| 117407 |
+
"step": 16771
|
| 117408 |
+
},
|
| 117409 |
+
{
|
| 117410 |
+
"epoch": 19.102564102564102,
|
| 117411 |
+
"grad_norm": 0.1858184039592743,
|
| 117412 |
+
"learning_rate": 2.697866955146011e-07,
|
| 117413 |
+
"loss": 0.686,
|
| 117414 |
+
"step": 16772
|
| 117415 |
+
},
|
| 117416 |
+
{
|
| 117417 |
+
"epoch": 19.103703703703705,
|
| 117418 |
+
"grad_norm": 0.20208309590816498,
|
| 117419 |
+
"learning_rate": 2.691044876984322e-07,
|
| 117420 |
+
"loss": 0.3459,
|
| 117421 |
+
"step": 16773
|
| 117422 |
+
},
|
| 117423 |
+
{
|
| 117424 |
+
"epoch": 19.104843304843303,
|
| 117425 |
+
"grad_norm": 0.20424501597881317,
|
| 117426 |
+
"learning_rate": 2.6842313885427626e-07,
|
| 117427 |
+
"loss": 0.5818,
|
| 117428 |
+
"step": 16774
|
| 117429 |
+
},
|
| 117430 |
+
{
|
| 117431 |
+
"epoch": 19.105982905982906,
|
| 117432 |
+
"grad_norm": 0.20619229972362518,
|
| 117433 |
+
"learning_rate": 2.6774264900579494e-07,
|
| 117434 |
+
"loss": 0.5573,
|
| 117435 |
+
"step": 16775
|
| 117436 |
+
},
|
| 117437 |
+
{
|
| 117438 |
+
"epoch": 19.107122507122508,
|
| 117439 |
+
"grad_norm": 0.18667425215244293,
|
| 117440 |
+
"learning_rate": 2.670630181766276e-07,
|
| 117441 |
+
"loss": 0.7136,
|
| 117442 |
+
"step": 16776
|
| 117443 |
+
},
|
| 117444 |
+
{
|
| 117445 |
+
"epoch": 19.108262108262107,
|
| 117446 |
+
"grad_norm": 0.21457980573177338,
|
| 117447 |
+
"learning_rate": 2.6638424639037486e-07,
|
| 117448 |
+
"loss": 0.5283,
|
| 117449 |
+
"step": 16777
|
| 117450 |
+
},
|
| 117451 |
+
{
|
| 117452 |
+
"epoch": 19.10940170940171,
|
| 117453 |
+
"grad_norm": 0.21677808463573456,
|
| 117454 |
+
"learning_rate": 2.657063336706178e-07,
|
| 117455 |
+
"loss": 0.671,
|
| 117456 |
+
"step": 16778
|
| 117457 |
+
},
|
| 117458 |
+
{
|
| 117459 |
+
"epoch": 19.11054131054131,
|
| 117460 |
+
"grad_norm": 0.25177788734436035,
|
| 117461 |
+
"learning_rate": 2.65029280040896e-07,
|
| 117462 |
+
"loss": 0.5678,
|
| 117463 |
+
"step": 16779
|
| 117464 |
+
},
|
| 117465 |
+
{
|
| 117466 |
+
"epoch": 19.11168091168091,
|
| 117467 |
+
"grad_norm": 0.19061064720153809,
|
| 117468 |
+
"learning_rate": 2.643530855247323e-07,
|
| 117469 |
+
"loss": 0.5776,
|
| 117470 |
+
"step": 16780
|
| 117471 |
+
},
|
| 117472 |
+
{
|
| 117473 |
+
"epoch": 19.112820512820512,
|
| 117474 |
+
"grad_norm": 0.19353726506233215,
|
| 117475 |
+
"learning_rate": 2.6367775014560505e-07,
|
| 117476 |
+
"loss": 0.7864,
|
| 117477 |
+
"step": 16781
|
| 117478 |
+
},
|
| 117479 |
+
{
|
| 117480 |
+
"epoch": 19.113960113960115,
|
| 117481 |
+
"grad_norm": 0.21912068128585815,
|
| 117482 |
+
"learning_rate": 2.6300327392697886e-07,
|
| 117483 |
+
"loss": 0.4603,
|
| 117484 |
+
"step": 16782
|
| 117485 |
+
},
|
| 117486 |
+
{
|
| 117487 |
+
"epoch": 19.115099715099714,
|
| 117488 |
+
"grad_norm": 0.17616406083106995,
|
| 117489 |
+
"learning_rate": 2.6232965689227395e-07,
|
| 117490 |
+
"loss": 0.8158,
|
| 117491 |
+
"step": 16783
|
| 117492 |
+
},
|
| 117493 |
+
{
|
| 117494 |
+
"epoch": 19.116239316239316,
|
| 117495 |
+
"grad_norm": 0.21496127545833588,
|
| 117496 |
+
"learning_rate": 2.6165689906488823e-07,
|
| 117497 |
+
"loss": 0.6297,
|
| 117498 |
+
"step": 16784
|
| 117499 |
+
},
|
| 117500 |
+
{
|
| 117501 |
+
"epoch": 19.117378917378918,
|
| 117502 |
+
"grad_norm": 0.25769445300102234,
|
| 117503 |
+
"learning_rate": 2.609850004681891e-07,
|
| 117504 |
+
"loss": 0.4489,
|
| 117505 |
+
"step": 16785
|
| 117506 |
+
},
|
| 117507 |
+
{
|
| 117508 |
+
"epoch": 19.118518518518517,
|
| 117509 |
+
"grad_norm": 0.20533311367034912,
|
| 117510 |
+
"learning_rate": 2.603139611255162e-07,
|
| 117511 |
+
"loss": 0.51,
|
| 117512 |
+
"step": 16786
|
| 117513 |
+
},
|
| 117514 |
+
{
|
| 117515 |
+
"epoch": 19.11965811965812,
|
| 117516 |
+
"grad_norm": 0.1962464451789856,
|
| 117517 |
+
"learning_rate": 2.596437810601704e-07,
|
| 117518 |
+
"loss": 0.7582,
|
| 117519 |
+
"step": 16787
|
| 117520 |
+
},
|
| 117521 |
+
{
|
| 117522 |
+
"epoch": 19.12079772079772,
|
| 117523 |
+
"grad_norm": 0.16168759763240814,
|
| 117524 |
+
"learning_rate": 2.5897446029543305e-07,
|
| 117525 |
+
"loss": 0.7717,
|
| 117526 |
+
"step": 16788
|
| 117527 |
+
},
|
| 117528 |
+
{
|
| 117529 |
+
"epoch": 19.12193732193732,
|
| 117530 |
+
"grad_norm": 0.21514791250228882,
|
| 117531 |
+
"learning_rate": 2.583059988545522e-07,
|
| 117532 |
+
"loss": 0.5402,
|
| 117533 |
+
"step": 16789
|
| 117534 |
+
},
|
| 117535 |
+
{
|
| 117536 |
+
"epoch": 19.123076923076923,
|
| 117537 |
+
"grad_norm": 0.1739581674337387,
|
| 117538 |
+
"learning_rate": 2.576383967607454e-07,
|
| 117539 |
+
"loss": 0.9189,
|
| 117540 |
+
"step": 16790
|
| 117541 |
+
},
|
| 117542 |
+
{
|
| 117543 |
+
"epoch": 19.124216524216525,
|
| 117544 |
+
"grad_norm": 0.2398281991481781,
|
| 117545 |
+
"learning_rate": 2.5697165403719694e-07,
|
| 117546 |
+
"loss": 0.531,
|
| 117547 |
+
"step": 16791
|
| 117548 |
+
},
|
| 117549 |
+
{
|
| 117550 |
+
"epoch": 19.125356125356124,
|
| 117551 |
+
"grad_norm": 0.21551766991615295,
|
| 117552 |
+
"learning_rate": 2.5630577070706595e-07,
|
| 117553 |
+
"loss": 0.5487,
|
| 117554 |
+
"step": 16792
|
| 117555 |
+
},
|
| 117556 |
+
{
|
| 117557 |
+
"epoch": 19.126495726495726,
|
| 117558 |
+
"grad_norm": 0.19920246303081512,
|
| 117559 |
+
"learning_rate": 2.556407467934813e-07,
|
| 117560 |
+
"loss": 0.5966,
|
| 117561 |
+
"step": 16793
|
| 117562 |
+
},
|
| 117563 |
+
{
|
| 117564 |
+
"epoch": 19.12763532763533,
|
| 117565 |
+
"grad_norm": 0.1649751216173172,
|
| 117566 |
+
"learning_rate": 2.54976582319541e-07,
|
| 117567 |
+
"loss": 0.8289,
|
| 117568 |
+
"step": 16794
|
| 117569 |
+
},
|
| 117570 |
+
{
|
| 117571 |
+
"epoch": 19.128774928774927,
|
| 117572 |
+
"grad_norm": 0.23176871240139008,
|
| 117573 |
+
"learning_rate": 2.543132773083129e-07,
|
| 117574 |
+
"loss": 0.694,
|
| 117575 |
+
"step": 16795
|
| 117576 |
+
},
|
| 117577 |
+
{
|
| 117578 |
+
"epoch": 19.12991452991453,
|
| 117579 |
+
"grad_norm": 0.20490753650665283,
|
| 117580 |
+
"learning_rate": 2.53650831782834e-07,
|
| 117581 |
+
"loss": 0.6516,
|
| 117582 |
+
"step": 16796
|
| 117583 |
+
},
|
| 117584 |
+
{
|
| 117585 |
+
"epoch": 19.13105413105413,
|
| 117586 |
+
"grad_norm": 0.2096950113773346,
|
| 117587 |
+
"learning_rate": 2.529892457661165e-07,
|
| 117588 |
+
"loss": 0.6214,
|
| 117589 |
+
"step": 16797
|
| 117590 |
+
},
|
| 117591 |
+
{
|
| 117592 |
+
"epoch": 19.13219373219373,
|
| 117593 |
+
"grad_norm": 0.20809367299079895,
|
| 117594 |
+
"learning_rate": 2.5232851928113644e-07,
|
| 117595 |
+
"loss": 0.6129,
|
| 117596 |
+
"step": 16798
|
| 117597 |
+
},
|
| 117598 |
+
{
|
| 117599 |
+
"epoch": 19.133333333333333,
|
| 117600 |
+
"grad_norm": 0.15231585502624512,
|
| 117601 |
+
"learning_rate": 2.516686523508449e-07,
|
| 117602 |
+
"loss": 0.6113,
|
| 117603 |
+
"step": 16799
|
| 117604 |
+
},
|
| 117605 |
+
{
|
| 117606 |
+
"epoch": 19.134472934472935,
|
| 117607 |
+
"grad_norm": 0.2246239334344864,
|
| 117608 |
+
"learning_rate": 2.510096449981569e-07,
|
| 117609 |
+
"loss": 0.4266,
|
| 117610 |
+
"step": 16800
|
| 117611 |
}
|
| 117612 |
],
|
| 117613 |
"logging_steps": 1,
|
|
|
|
| 117627 |
"attributes": {}
|
| 117628 |
}
|
| 117629 |
},
|
| 117630 |
+
"total_flos": 9.39308616865058e+19,
|
| 117631 |
"train_batch_size": 8,
|
| 117632 |
"trial_name": null,
|
| 117633 |
"trial_params": null
|