Upload checkpoint checkpoint-60000 (flat) - 2025-11-28T17:48:26.521020
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +0 -0
- scheduler.pt +0 -0
- trainer_state.json +3519 -6
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 63025064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d08035c3b8586deb374b6b1ef3c85f06a2db3572edb94c50dce7fdcfd2373eb
|
| 3 |
size 63025064
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 34846738
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc1c9d1a67caaa33abb8c781b9be438896fd7b77bac2d34818d5e60b28e2cecc
|
| 3 |
size 34846738
|
rng_state.pth
CHANGED
|
Binary files a/rng_state.pth and b/rng_state.pth differ
|
|
|
scheduler.pt
CHANGED
|
Binary files a/scheduler.pt and b/scheduler.pt differ
|
|
|
trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "./whisper-translate-bn-or\\checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 10000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -38580,6 +38580,3519 @@
|
|
| 38580 |
"learning_rate": 5.408288061361749e-06,
|
| 38581 |
"loss": 0.4237,
|
| 38582 |
"step": 55000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38583 |
}
|
| 38584 |
],
|
| 38585 |
"logging_steps": 10,
|
|
@@ -38599,7 +42112,7 @@
|
|
| 38599 |
"attributes": {}
|
| 38600 |
}
|
| 38601 |
},
|
| 38602 |
-
"total_flos": 3.
|
| 38603 |
"train_batch_size": 8,
|
| 38604 |
"trial_name": null,
|
| 38605 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 60000,
|
| 3 |
+
"best_metric": 0.4047969489431023,
|
| 4 |
+
"best_model_checkpoint": "./whisper-translate-bn-or\\checkpoint-60000",
|
| 5 |
+
"epoch": 6.009615866179196,
|
| 6 |
"eval_steps": 10000,
|
| 7 |
+
"global_step": 60000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 38580 |
"learning_rate": 5.408288061361749e-06,
|
| 38581 |
"loss": 0.4237,
|
| 38582 |
"step": 55000
|
| 38583 |
+
},
|
| 38584 |
+
{
|
| 38585 |
+
"epoch": 5.509841238042771,
|
| 38586 |
+
"grad_norm": 2.5870304107666016,
|
| 38587 |
+
"learning_rate": 5.401299259191697e-06,
|
| 38588 |
+
"loss": 0.3804,
|
| 38589 |
+
"step": 55010
|
| 38590 |
+
},
|
| 38591 |
+
{
|
| 38592 |
+
"epoch": 5.51084289076977,
|
| 38593 |
+
"grad_norm": 2.272921562194824,
|
| 38594 |
+
"learning_rate": 5.394314428621761e-06,
|
| 38595 |
+
"loss": 0.4343,
|
| 38596 |
+
"step": 55020
|
| 38597 |
+
},
|
| 38598 |
+
{
|
| 38599 |
+
"epoch": 5.511844543496769,
|
| 38600 |
+
"grad_norm": 1.8222514390945435,
|
| 38601 |
+
"learning_rate": 5.387333571067399e-06,
|
| 38602 |
+
"loss": 0.389,
|
| 38603 |
+
"step": 55030
|
| 38604 |
+
},
|
| 38605 |
+
{
|
| 38606 |
+
"epoch": 5.512846196223769,
|
| 38607 |
+
"grad_norm": 2.294215679168701,
|
| 38608 |
+
"learning_rate": 5.380356687943231e-06,
|
| 38609 |
+
"loss": 0.4157,
|
| 38610 |
+
"step": 55040
|
| 38611 |
+
},
|
| 38612 |
+
{
|
| 38613 |
+
"epoch": 5.5138478489507685,
|
| 38614 |
+
"grad_norm": 1.784379482269287,
|
| 38615 |
+
"learning_rate": 5.373383780663116e-06,
|
| 38616 |
+
"loss": 0.3233,
|
| 38617 |
+
"step": 55050
|
| 38618 |
+
},
|
| 38619 |
+
{
|
| 38620 |
+
"epoch": 5.5148495016777686,
|
| 38621 |
+
"grad_norm": 2.3985023498535156,
|
| 38622 |
+
"learning_rate": 5.366414850640084e-06,
|
| 38623 |
+
"loss": 0.423,
|
| 38624 |
+
"step": 55060
|
| 38625 |
+
},
|
| 38626 |
+
{
|
| 38627 |
+
"epoch": 5.515851154404768,
|
| 38628 |
+
"grad_norm": 1.9860508441925049,
|
| 38629 |
+
"learning_rate": 5.359449899286356e-06,
|
| 38630 |
+
"loss": 0.4267,
|
| 38631 |
+
"step": 55070
|
| 38632 |
+
},
|
| 38633 |
+
{
|
| 38634 |
+
"epoch": 5.516852807131768,
|
| 38635 |
+
"grad_norm": 10.242769241333008,
|
| 38636 |
+
"learning_rate": 5.352488928013355e-06,
|
| 38637 |
+
"loss": 0.421,
|
| 38638 |
+
"step": 55080
|
| 38639 |
+
},
|
| 38640 |
+
{
|
| 38641 |
+
"epoch": 5.517854459858767,
|
| 38642 |
+
"grad_norm": 2.2236850261688232,
|
| 38643 |
+
"learning_rate": 5.3455319382316935e-06,
|
| 38644 |
+
"loss": 0.467,
|
| 38645 |
+
"step": 55090
|
| 38646 |
+
},
|
| 38647 |
+
{
|
| 38648 |
+
"epoch": 5.518856112585766,
|
| 38649 |
+
"grad_norm": 2.9083995819091797,
|
| 38650 |
+
"learning_rate": 5.338578931351185e-06,
|
| 38651 |
+
"loss": 0.4175,
|
| 38652 |
+
"step": 55100
|
| 38653 |
+
},
|
| 38654 |
+
{
|
| 38655 |
+
"epoch": 5.519857765312766,
|
| 38656 |
+
"grad_norm": 2.7980551719665527,
|
| 38657 |
+
"learning_rate": 5.331629908780811e-06,
|
| 38658 |
+
"loss": 0.405,
|
| 38659 |
+
"step": 55110
|
| 38660 |
+
},
|
| 38661 |
+
{
|
| 38662 |
+
"epoch": 5.520859418039766,
|
| 38663 |
+
"grad_norm": 1.9399455785751343,
|
| 38664 |
+
"learning_rate": 5.324684871928781e-06,
|
| 38665 |
+
"loss": 0.3661,
|
| 38666 |
+
"step": 55120
|
| 38667 |
+
},
|
| 38668 |
+
{
|
| 38669 |
+
"epoch": 5.521861070766765,
|
| 38670 |
+
"grad_norm": 2.6033365726470947,
|
| 38671 |
+
"learning_rate": 5.3177438222024755e-06,
|
| 38672 |
+
"loss": 0.4528,
|
| 38673 |
+
"step": 55130
|
| 38674 |
+
},
|
| 38675 |
+
{
|
| 38676 |
+
"epoch": 5.522862723493764,
|
| 38677 |
+
"grad_norm": 2.736616849899292,
|
| 38678 |
+
"learning_rate": 5.3108067610084715e-06,
|
| 38679 |
+
"loss": 0.4238,
|
| 38680 |
+
"step": 55140
|
| 38681 |
+
},
|
| 38682 |
+
{
|
| 38683 |
+
"epoch": 5.523864376220764,
|
| 38684 |
+
"grad_norm": 2.470409870147705,
|
| 38685 |
+
"learning_rate": 5.303873689752531e-06,
|
| 38686 |
+
"loss": 0.427,
|
| 38687 |
+
"step": 55150
|
| 38688 |
+
},
|
| 38689 |
+
{
|
| 38690 |
+
"epoch": 5.524866028947764,
|
| 38691 |
+
"grad_norm": 2.484952926635742,
|
| 38692 |
+
"learning_rate": 5.296944609839624e-06,
|
| 38693 |
+
"loss": 0.3858,
|
| 38694 |
+
"step": 55160
|
| 38695 |
+
},
|
| 38696 |
+
{
|
| 38697 |
+
"epoch": 5.525867681674764,
|
| 38698 |
+
"grad_norm": 2.44401478767395,
|
| 38699 |
+
"learning_rate": 5.290019522673892e-06,
|
| 38700 |
+
"loss": 0.4209,
|
| 38701 |
+
"step": 55170
|
| 38702 |
+
},
|
| 38703 |
+
{
|
| 38704 |
+
"epoch": 5.526869334401763,
|
| 38705 |
+
"grad_norm": 1.9219155311584473,
|
| 38706 |
+
"learning_rate": 5.283098429658684e-06,
|
| 38707 |
+
"loss": 0.4619,
|
| 38708 |
+
"step": 55180
|
| 38709 |
+
},
|
| 38710 |
+
{
|
| 38711 |
+
"epoch": 5.527870987128763,
|
| 38712 |
+
"grad_norm": 2.730436325073242,
|
| 38713 |
+
"learning_rate": 5.276181332196525e-06,
|
| 38714 |
+
"loss": 0.3948,
|
| 38715 |
+
"step": 55190
|
| 38716 |
+
},
|
| 38717 |
+
{
|
| 38718 |
+
"epoch": 5.528872639855762,
|
| 38719 |
+
"grad_norm": 2.0215957164764404,
|
| 38720 |
+
"learning_rate": 5.269268231689145e-06,
|
| 38721 |
+
"loss": 0.4291,
|
| 38722 |
+
"step": 55200
|
| 38723 |
+
},
|
| 38724 |
+
{
|
| 38725 |
+
"epoch": 5.529874292582761,
|
| 38726 |
+
"grad_norm": 2.180464744567871,
|
| 38727 |
+
"learning_rate": 5.262359129537456e-06,
|
| 38728 |
+
"loss": 0.42,
|
| 38729 |
+
"step": 55210
|
| 38730 |
+
},
|
| 38731 |
+
{
|
| 38732 |
+
"epoch": 5.530875945309761,
|
| 38733 |
+
"grad_norm": 2.3438057899475098,
|
| 38734 |
+
"learning_rate": 5.2554540271415546e-06,
|
| 38735 |
+
"loss": 0.4484,
|
| 38736 |
+
"step": 55220
|
| 38737 |
+
},
|
| 38738 |
+
{
|
| 38739 |
+
"epoch": 5.53187759803676,
|
| 38740 |
+
"grad_norm": 2.27927827835083,
|
| 38741 |
+
"learning_rate": 5.248552925900738e-06,
|
| 38742 |
+
"loss": 0.3899,
|
| 38743 |
+
"step": 55230
|
| 38744 |
+
},
|
| 38745 |
+
{
|
| 38746 |
+
"epoch": 5.53287925076376,
|
| 38747 |
+
"grad_norm": 2.1857833862304688,
|
| 38748 |
+
"learning_rate": 5.241655827213487e-06,
|
| 38749 |
+
"loss": 0.3472,
|
| 38750 |
+
"step": 55240
|
| 38751 |
+
},
|
| 38752 |
+
{
|
| 38753 |
+
"epoch": 5.5338809034907595,
|
| 38754 |
+
"grad_norm": 2.251704216003418,
|
| 38755 |
+
"learning_rate": 5.234762732477469e-06,
|
| 38756 |
+
"loss": 0.4025,
|
| 38757 |
+
"step": 55250
|
| 38758 |
+
},
|
| 38759 |
+
{
|
| 38760 |
+
"epoch": 5.5348825562177595,
|
| 38761 |
+
"grad_norm": 2.3819692134857178,
|
| 38762 |
+
"learning_rate": 5.2278736430895455e-06,
|
| 38763 |
+
"loss": 0.4069,
|
| 38764 |
+
"step": 55260
|
| 38765 |
+
},
|
| 38766 |
+
{
|
| 38767 |
+
"epoch": 5.535884208944759,
|
| 38768 |
+
"grad_norm": 2.3232901096343994,
|
| 38769 |
+
"learning_rate": 5.2209885604457634e-06,
|
| 38770 |
+
"loss": 0.3762,
|
| 38771 |
+
"step": 55270
|
| 38772 |
+
},
|
| 38773 |
+
{
|
| 38774 |
+
"epoch": 5.536885861671759,
|
| 38775 |
+
"grad_norm": 2.466538667678833,
|
| 38776 |
+
"learning_rate": 5.214107485941355e-06,
|
| 38777 |
+
"loss": 0.3595,
|
| 38778 |
+
"step": 55280
|
| 38779 |
+
},
|
| 38780 |
+
{
|
| 38781 |
+
"epoch": 5.537887514398758,
|
| 38782 |
+
"grad_norm": 2.3276658058166504,
|
| 38783 |
+
"learning_rate": 5.207230420970746e-06,
|
| 38784 |
+
"loss": 0.3998,
|
| 38785 |
+
"step": 55290
|
| 38786 |
+
},
|
| 38787 |
+
{
|
| 38788 |
+
"epoch": 5.538889167125758,
|
| 38789 |
+
"grad_norm": 2.1215779781341553,
|
| 38790 |
+
"learning_rate": 5.200357366927547e-06,
|
| 38791 |
+
"loss": 0.3797,
|
| 38792 |
+
"step": 55300
|
| 38793 |
+
},
|
| 38794 |
+
{
|
| 38795 |
+
"epoch": 5.539890819852757,
|
| 38796 |
+
"grad_norm": 2.2454323768615723,
|
| 38797 |
+
"learning_rate": 5.193488325204551e-06,
|
| 38798 |
+
"loss": 0.415,
|
| 38799 |
+
"step": 55310
|
| 38800 |
+
},
|
| 38801 |
+
{
|
| 38802 |
+
"epoch": 5.540892472579756,
|
| 38803 |
+
"grad_norm": 1.9308357238769531,
|
| 38804 |
+
"learning_rate": 5.186623297193749e-06,
|
| 38805 |
+
"loss": 0.4037,
|
| 38806 |
+
"step": 55320
|
| 38807 |
+
},
|
| 38808 |
+
{
|
| 38809 |
+
"epoch": 5.541894125306756,
|
| 38810 |
+
"grad_norm": 2.1115362644195557,
|
| 38811 |
+
"learning_rate": 5.179762284286307e-06,
|
| 38812 |
+
"loss": 0.4279,
|
| 38813 |
+
"step": 55330
|
| 38814 |
+
},
|
| 38815 |
+
{
|
| 38816 |
+
"epoch": 5.542895778033755,
|
| 38817 |
+
"grad_norm": 2.275033712387085,
|
| 38818 |
+
"learning_rate": 5.172905287872587e-06,
|
| 38819 |
+
"loss": 0.4195,
|
| 38820 |
+
"step": 55340
|
| 38821 |
+
},
|
| 38822 |
+
{
|
| 38823 |
+
"epoch": 5.543897430760755,
|
| 38824 |
+
"grad_norm": 2.168248176574707,
|
| 38825 |
+
"learning_rate": 5.1660523093421286e-06,
|
| 38826 |
+
"loss": 0.4031,
|
| 38827 |
+
"step": 55350
|
| 38828 |
+
},
|
| 38829 |
+
{
|
| 38830 |
+
"epoch": 5.5448990834877545,
|
| 38831 |
+
"grad_norm": 2.5694780349731445,
|
| 38832 |
+
"learning_rate": 5.159203350083661e-06,
|
| 38833 |
+
"loss": 0.3815,
|
| 38834 |
+
"step": 55360
|
| 38835 |
+
},
|
| 38836 |
+
{
|
| 38837 |
+
"epoch": 5.5459007362147545,
|
| 38838 |
+
"grad_norm": 2.2378110885620117,
|
| 38839 |
+
"learning_rate": 5.152358411485103e-06,
|
| 38840 |
+
"loss": 0.4889,
|
| 38841 |
+
"step": 55370
|
| 38842 |
+
},
|
| 38843 |
+
{
|
| 38844 |
+
"epoch": 5.546902388941754,
|
| 38845 |
+
"grad_norm": 1.9047712087631226,
|
| 38846 |
+
"learning_rate": 5.145517494933549e-06,
|
| 38847 |
+
"loss": 0.3602,
|
| 38848 |
+
"step": 55380
|
| 38849 |
+
},
|
| 38850 |
+
{
|
| 38851 |
+
"epoch": 5.547904041668754,
|
| 38852 |
+
"grad_norm": 2.280135154724121,
|
| 38853 |
+
"learning_rate": 5.138680601815288e-06,
|
| 38854 |
+
"loss": 0.4001,
|
| 38855 |
+
"step": 55390
|
| 38856 |
+
},
|
| 38857 |
+
{
|
| 38858 |
+
"epoch": 5.548905694395753,
|
| 38859 |
+
"grad_norm": 1.936503529548645,
|
| 38860 |
+
"learning_rate": 5.1318477335157905e-06,
|
| 38861 |
+
"loss": 0.3838,
|
| 38862 |
+
"step": 55400
|
| 38863 |
+
},
|
| 38864 |
+
{
|
| 38865 |
+
"epoch": 5.549907347122753,
|
| 38866 |
+
"grad_norm": 2.357356548309326,
|
| 38867 |
+
"learning_rate": 5.125018891419708e-06,
|
| 38868 |
+
"loss": 0.436,
|
| 38869 |
+
"step": 55410
|
| 38870 |
+
},
|
| 38871 |
+
{
|
| 38872 |
+
"epoch": 5.550908999849752,
|
| 38873 |
+
"grad_norm": 1.6730624437332153,
|
| 38874 |
+
"learning_rate": 5.1181940769108786e-06,
|
| 38875 |
+
"loss": 0.4016,
|
| 38876 |
+
"step": 55420
|
| 38877 |
+
},
|
| 38878 |
+
{
|
| 38879 |
+
"epoch": 5.551910652576751,
|
| 38880 |
+
"grad_norm": 2.563577890396118,
|
| 38881 |
+
"learning_rate": 5.111373291372326e-06,
|
| 38882 |
+
"loss": 0.4042,
|
| 38883 |
+
"step": 55430
|
| 38884 |
+
},
|
| 38885 |
+
{
|
| 38886 |
+
"epoch": 5.552912305303751,
|
| 38887 |
+
"grad_norm": 2.3064987659454346,
|
| 38888 |
+
"learning_rate": 5.104556536186256e-06,
|
| 38889 |
+
"loss": 0.5052,
|
| 38890 |
+
"step": 55440
|
| 38891 |
+
},
|
| 38892 |
+
{
|
| 38893 |
+
"epoch": 5.55391395803075,
|
| 38894 |
+
"grad_norm": 2.0622353553771973,
|
| 38895 |
+
"learning_rate": 5.0977438127340485e-06,
|
| 38896 |
+
"loss": 0.4102,
|
| 38897 |
+
"step": 55450
|
| 38898 |
+
},
|
| 38899 |
+
{
|
| 38900 |
+
"epoch": 5.55491561075775,
|
| 38901 |
+
"grad_norm": 2.744607448577881,
|
| 38902 |
+
"learning_rate": 5.090935122396295e-06,
|
| 38903 |
+
"loss": 0.3889,
|
| 38904 |
+
"step": 55460
|
| 38905 |
+
},
|
| 38906 |
+
{
|
| 38907 |
+
"epoch": 5.55591726348475,
|
| 38908 |
+
"grad_norm": 2.537193536758423,
|
| 38909 |
+
"learning_rate": 5.084130466552733e-06,
|
| 38910 |
+
"loss": 0.3836,
|
| 38911 |
+
"step": 55470
|
| 38912 |
+
},
|
| 38913 |
+
{
|
| 38914 |
+
"epoch": 5.55691891621175,
|
| 38915 |
+
"grad_norm": 1.953226923942566,
|
| 38916 |
+
"learning_rate": 5.077329846582304e-06,
|
| 38917 |
+
"loss": 0.387,
|
| 38918 |
+
"step": 55480
|
| 38919 |
+
},
|
| 38920 |
+
{
|
| 38921 |
+
"epoch": 5.557920568938749,
|
| 38922 |
+
"grad_norm": 1.9167578220367432,
|
| 38923 |
+
"learning_rate": 5.070533263863131e-06,
|
| 38924 |
+
"loss": 0.4026,
|
| 38925 |
+
"step": 55490
|
| 38926 |
+
},
|
| 38927 |
+
{
|
| 38928 |
+
"epoch": 5.558922221665749,
|
| 38929 |
+
"grad_norm": 2.7961089611053467,
|
| 38930 |
+
"learning_rate": 5.063740719772505e-06,
|
| 38931 |
+
"loss": 0.4772,
|
| 38932 |
+
"step": 55500
|
| 38933 |
+
},
|
| 38934 |
+
{
|
| 38935 |
+
"epoch": 5.559923874392748,
|
| 38936 |
+
"grad_norm": 2.758035659790039,
|
| 38937 |
+
"learning_rate": 5.056952215686919e-06,
|
| 38938 |
+
"loss": 0.3933,
|
| 38939 |
+
"step": 55510
|
| 38940 |
+
},
|
| 38941 |
+
{
|
| 38942 |
+
"epoch": 5.560925527119748,
|
| 38943 |
+
"grad_norm": 1.7131747007369995,
|
| 38944 |
+
"learning_rate": 5.0501677529820266e-06,
|
| 38945 |
+
"loss": 0.4455,
|
| 38946 |
+
"step": 55520
|
| 38947 |
+
},
|
| 38948 |
+
{
|
| 38949 |
+
"epoch": 5.561927179846747,
|
| 38950 |
+
"grad_norm": 3.062673568725586,
|
| 38951 |
+
"learning_rate": 5.0433873330326925e-06,
|
| 38952 |
+
"loss": 0.4611,
|
| 38953 |
+
"step": 55530
|
| 38954 |
+
},
|
| 38955 |
+
{
|
| 38956 |
+
"epoch": 5.562928832573746,
|
| 38957 |
+
"grad_norm": 2.5144660472869873,
|
| 38958 |
+
"learning_rate": 5.03661095721292e-06,
|
| 38959 |
+
"loss": 0.3533,
|
| 38960 |
+
"step": 55540
|
| 38961 |
+
},
|
| 38962 |
+
{
|
| 38963 |
+
"epoch": 5.563930485300746,
|
| 38964 |
+
"grad_norm": 2.3329527378082275,
|
| 38965 |
+
"learning_rate": 5.029838626895924e-06,
|
| 38966 |
+
"loss": 0.3626,
|
| 38967 |
+
"step": 55550
|
| 38968 |
+
},
|
| 38969 |
+
{
|
| 38970 |
+
"epoch": 5.5649321380277454,
|
| 38971 |
+
"grad_norm": 2.2478086948394775,
|
| 38972 |
+
"learning_rate": 5.023070343454087e-06,
|
| 38973 |
+
"loss": 0.3979,
|
| 38974 |
+
"step": 55560
|
| 38975 |
+
},
|
| 38976 |
+
{
|
| 38977 |
+
"epoch": 5.5659337907547455,
|
| 38978 |
+
"grad_norm": 2.0729291439056396,
|
| 38979 |
+
"learning_rate": 5.016306108258989e-06,
|
| 38980 |
+
"loss": 0.3791,
|
| 38981 |
+
"step": 55570
|
| 38982 |
+
},
|
| 38983 |
+
{
|
| 38984 |
+
"epoch": 5.566935443481745,
|
| 38985 |
+
"grad_norm": 1.8252699375152588,
|
| 38986 |
+
"learning_rate": 5.0095459226813605e-06,
|
| 38987 |
+
"loss": 0.4054,
|
| 38988 |
+
"step": 55580
|
| 38989 |
+
},
|
| 38990 |
+
{
|
| 38991 |
+
"epoch": 5.567937096208745,
|
| 38992 |
+
"grad_norm": 2.431644916534424,
|
| 38993 |
+
"learning_rate": 5.002789788091125e-06,
|
| 38994 |
+
"loss": 0.4084,
|
| 38995 |
+
"step": 55590
|
| 38996 |
+
},
|
| 38997 |
+
{
|
| 38998 |
+
"epoch": 5.568938748935744,
|
| 38999 |
+
"grad_norm": 2.4573440551757812,
|
| 39000 |
+
"learning_rate": 4.9960377058574065e-06,
|
| 39001 |
+
"loss": 0.3977,
|
| 39002 |
+
"step": 55600
|
| 39003 |
+
},
|
| 39004 |
+
{
|
| 39005 |
+
"epoch": 5.569940401662744,
|
| 39006 |
+
"grad_norm": 2.4722249507904053,
|
| 39007 |
+
"learning_rate": 4.989289677348469e-06,
|
| 39008 |
+
"loss": 0.399,
|
| 39009 |
+
"step": 55610
|
| 39010 |
+
},
|
| 39011 |
+
{
|
| 39012 |
+
"epoch": 5.570942054389743,
|
| 39013 |
+
"grad_norm": 2.153240442276001,
|
| 39014 |
+
"learning_rate": 4.982545703931779e-06,
|
| 39015 |
+
"loss": 0.3903,
|
| 39016 |
+
"step": 55620
|
| 39017 |
+
},
|
| 39018 |
+
{
|
| 39019 |
+
"epoch": 5.571943707116743,
|
| 39020 |
+
"grad_norm": 2.061044216156006,
|
| 39021 |
+
"learning_rate": 4.975805786973972e-06,
|
| 39022 |
+
"loss": 0.4102,
|
| 39023 |
+
"step": 55630
|
| 39024 |
+
},
|
| 39025 |
+
{
|
| 39026 |
+
"epoch": 5.572945359843742,
|
| 39027 |
+
"grad_norm": 2.03114652633667,
|
| 39028 |
+
"learning_rate": 4.9690699278408855e-06,
|
| 39029 |
+
"loss": 0.4143,
|
| 39030 |
+
"step": 55640
|
| 39031 |
+
},
|
| 39032 |
+
{
|
| 39033 |
+
"epoch": 5.573947012570741,
|
| 39034 |
+
"grad_norm": 1.9882391691207886,
|
| 39035 |
+
"learning_rate": 4.962338127897495e-06,
|
| 39036 |
+
"loss": 0.4202,
|
| 39037 |
+
"step": 55650
|
| 39038 |
+
},
|
| 39039 |
+
{
|
| 39040 |
+
"epoch": 5.574948665297741,
|
| 39041 |
+
"grad_norm": 2.1374762058258057,
|
| 39042 |
+
"learning_rate": 4.955610388507972e-06,
|
| 39043 |
+
"loss": 0.4778,
|
| 39044 |
+
"step": 55660
|
| 39045 |
+
},
|
| 39046 |
+
{
|
| 39047 |
+
"epoch": 5.5759503180247405,
|
| 39048 |
+
"grad_norm": 2.1268529891967773,
|
| 39049 |
+
"learning_rate": 4.94888671103568e-06,
|
| 39050 |
+
"loss": 0.4456,
|
| 39051 |
+
"step": 55670
|
| 39052 |
+
},
|
| 39053 |
+
{
|
| 39054 |
+
"epoch": 5.5769519707517405,
|
| 39055 |
+
"grad_norm": 2.9118077754974365,
|
| 39056 |
+
"learning_rate": 4.9421670968431476e-06,
|
| 39057 |
+
"loss": 0.4545,
|
| 39058 |
+
"step": 55680
|
| 39059 |
+
},
|
| 39060 |
+
{
|
| 39061 |
+
"epoch": 5.57795362347874,
|
| 39062 |
+
"grad_norm": 2.536949396133423,
|
| 39063 |
+
"learning_rate": 4.935451547292064e-06,
|
| 39064 |
+
"loss": 0.3569,
|
| 39065 |
+
"step": 55690
|
| 39066 |
+
},
|
| 39067 |
+
{
|
| 39068 |
+
"epoch": 5.57895527620574,
|
| 39069 |
+
"grad_norm": 1.8817559480667114,
|
| 39070 |
+
"learning_rate": 4.928740063743309e-06,
|
| 39071 |
+
"loss": 0.4182,
|
| 39072 |
+
"step": 55700
|
| 39073 |
+
},
|
| 39074 |
+
{
|
| 39075 |
+
"epoch": 5.579956928932739,
|
| 39076 |
+
"grad_norm": 1.9330812692642212,
|
| 39077 |
+
"learning_rate": 4.92203264755696e-06,
|
| 39078 |
+
"loss": 0.4411,
|
| 39079 |
+
"step": 55710
|
| 39080 |
+
},
|
| 39081 |
+
{
|
| 39082 |
+
"epoch": 5.580958581659739,
|
| 39083 |
+
"grad_norm": 2.593085289001465,
|
| 39084 |
+
"learning_rate": 4.915329300092225e-06,
|
| 39085 |
+
"loss": 0.4232,
|
| 39086 |
+
"step": 55720
|
| 39087 |
+
},
|
| 39088 |
+
{
|
| 39089 |
+
"epoch": 5.581960234386738,
|
| 39090 |
+
"grad_norm": 2.357499837875366,
|
| 39091 |
+
"learning_rate": 4.908630022707514e-06,
|
| 39092 |
+
"loss": 0.4191,
|
| 39093 |
+
"step": 55730
|
| 39094 |
+
},
|
| 39095 |
+
{
|
| 39096 |
+
"epoch": 5.582961887113738,
|
| 39097 |
+
"grad_norm": 2.4547533988952637,
|
| 39098 |
+
"learning_rate": 4.901934816760423e-06,
|
| 39099 |
+
"loss": 0.3963,
|
| 39100 |
+
"step": 55740
|
| 39101 |
+
},
|
| 39102 |
+
{
|
| 39103 |
+
"epoch": 5.583963539840737,
|
| 39104 |
+
"grad_norm": 2.490044116973877,
|
| 39105 |
+
"learning_rate": 4.895243683607708e-06,
|
| 39106 |
+
"loss": 0.3901,
|
| 39107 |
+
"step": 55750
|
| 39108 |
+
},
|
| 39109 |
+
{
|
| 39110 |
+
"epoch": 5.584965192567736,
|
| 39111 |
+
"grad_norm": 1.9551465511322021,
|
| 39112 |
+
"learning_rate": 4.8885566246052926e-06,
|
| 39113 |
+
"loss": 0.4157,
|
| 39114 |
+
"step": 55760
|
| 39115 |
+
},
|
| 39116 |
+
{
|
| 39117 |
+
"epoch": 5.585966845294736,
|
| 39118 |
+
"grad_norm": 2.72518253326416,
|
| 39119 |
+
"learning_rate": 4.881873641108273e-06,
|
| 39120 |
+
"loss": 0.4032,
|
| 39121 |
+
"step": 55770
|
| 39122 |
+
},
|
| 39123 |
+
{
|
| 39124 |
+
"epoch": 5.586968498021736,
|
| 39125 |
+
"grad_norm": 2.4889354705810547,
|
| 39126 |
+
"learning_rate": 4.875194734470953e-06,
|
| 39127 |
+
"loss": 0.3652,
|
| 39128 |
+
"step": 55780
|
| 39129 |
+
},
|
| 39130 |
+
{
|
| 39131 |
+
"epoch": 5.587970150748736,
|
| 39132 |
+
"grad_norm": 1.6999070644378662,
|
| 39133 |
+
"learning_rate": 4.868519906046781e-06,
|
| 39134 |
+
"loss": 0.3672,
|
| 39135 |
+
"step": 55790
|
| 39136 |
+
},
|
| 39137 |
+
{
|
| 39138 |
+
"epoch": 5.588971803475735,
|
| 39139 |
+
"grad_norm": 2.633692979812622,
|
| 39140 |
+
"learning_rate": 4.861849157188367e-06,
|
| 39141 |
+
"loss": 0.4212,
|
| 39142 |
+
"step": 55800
|
| 39143 |
+
},
|
| 39144 |
+
{
|
| 39145 |
+
"epoch": 5.589973456202735,
|
| 39146 |
+
"grad_norm": 2.0171597003936768,
|
| 39147 |
+
"learning_rate": 4.855182489247531e-06,
|
| 39148 |
+
"loss": 0.4314,
|
| 39149 |
+
"step": 55810
|
| 39150 |
+
},
|
| 39151 |
+
{
|
| 39152 |
+
"epoch": 5.590975108929734,
|
| 39153 |
+
"grad_norm": 1.6404458284378052,
|
| 39154 |
+
"learning_rate": 4.848519903575249e-06,
|
| 39155 |
+
"loss": 0.3559,
|
| 39156 |
+
"step": 55820
|
| 39157 |
+
},
|
| 39158 |
+
{
|
| 39159 |
+
"epoch": 5.591976761656734,
|
| 39160 |
+
"grad_norm": 2.118978500366211,
|
| 39161 |
+
"learning_rate": 4.841861401521652e-06,
|
| 39162 |
+
"loss": 0.3897,
|
| 39163 |
+
"step": 55830
|
| 39164 |
+
},
|
| 39165 |
+
{
|
| 39166 |
+
"epoch": 5.592978414383733,
|
| 39167 |
+
"grad_norm": 2.1611812114715576,
|
| 39168 |
+
"learning_rate": 4.835206984436063e-06,
|
| 39169 |
+
"loss": 0.384,
|
| 39170 |
+
"step": 55840
|
| 39171 |
+
},
|
| 39172 |
+
{
|
| 39173 |
+
"epoch": 5.593980067110733,
|
| 39174 |
+
"grad_norm": 2.480172634124756,
|
| 39175 |
+
"learning_rate": 4.828556653666985e-06,
|
| 39176 |
+
"loss": 0.4026,
|
| 39177 |
+
"step": 55850
|
| 39178 |
+
},
|
| 39179 |
+
{
|
| 39180 |
+
"epoch": 5.594981719837732,
|
| 39181 |
+
"grad_norm": 2.1975290775299072,
|
| 39182 |
+
"learning_rate": 4.821910410562081e-06,
|
| 39183 |
+
"loss": 0.4318,
|
| 39184 |
+
"step": 55860
|
| 39185 |
+
},
|
| 39186 |
+
{
|
| 39187 |
+
"epoch": 5.595983372564731,
|
| 39188 |
+
"grad_norm": 2.0429039001464844,
|
| 39189 |
+
"learning_rate": 4.8152682564681665e-06,
|
| 39190 |
+
"loss": 0.386,
|
| 39191 |
+
"step": 55870
|
| 39192 |
+
},
|
| 39193 |
+
{
|
| 39194 |
+
"epoch": 5.5969850252917315,
|
| 39195 |
+
"grad_norm": 2.011446237564087,
|
| 39196 |
+
"learning_rate": 4.80863019273127e-06,
|
| 39197 |
+
"loss": 0.3534,
|
| 39198 |
+
"step": 55880
|
| 39199 |
+
},
|
| 39200 |
+
{
|
| 39201 |
+
"epoch": 5.597986678018731,
|
| 39202 |
+
"grad_norm": 2.308497428894043,
|
| 39203 |
+
"learning_rate": 4.801996220696567e-06,
|
| 39204 |
+
"loss": 0.4361,
|
| 39205 |
+
"step": 55890
|
| 39206 |
+
},
|
| 39207 |
+
{
|
| 39208 |
+
"epoch": 5.598988330745731,
|
| 39209 |
+
"grad_norm": 2.9305574893951416,
|
| 39210 |
+
"learning_rate": 4.795366341708394e-06,
|
| 39211 |
+
"loss": 0.3997,
|
| 39212 |
+
"step": 55900
|
| 39213 |
+
},
|
| 39214 |
+
{
|
| 39215 |
+
"epoch": 5.59998998347273,
|
| 39216 |
+
"grad_norm": 2.526543378829956,
|
| 39217 |
+
"learning_rate": 4.788740557110266e-06,
|
| 39218 |
+
"loss": 0.4091,
|
| 39219 |
+
"step": 55910
|
| 39220 |
+
},
|
| 39221 |
+
{
|
| 39222 |
+
"epoch": 5.60099163619973,
|
| 39223 |
+
"grad_norm": 2.7211554050445557,
|
| 39224 |
+
"learning_rate": 4.782118868244892e-06,
|
| 39225 |
+
"loss": 0.3944,
|
| 39226 |
+
"step": 55920
|
| 39227 |
+
},
|
| 39228 |
+
{
|
| 39229 |
+
"epoch": 5.601993288926729,
|
| 39230 |
+
"grad_norm": 2.3493165969848633,
|
| 39231 |
+
"learning_rate": 4.775501276454128e-06,
|
| 39232 |
+
"loss": 0.4719,
|
| 39233 |
+
"step": 55930
|
| 39234 |
+
},
|
| 39235 |
+
{
|
| 39236 |
+
"epoch": 5.602994941653729,
|
| 39237 |
+
"grad_norm": 1.6952441930770874,
|
| 39238 |
+
"learning_rate": 4.768887783078985e-06,
|
| 39239 |
+
"loss": 0.3397,
|
| 39240 |
+
"step": 55940
|
| 39241 |
+
},
|
| 39242 |
+
{
|
| 39243 |
+
"epoch": 5.603996594380728,
|
| 39244 |
+
"grad_norm": 3.4598031044006348,
|
| 39245 |
+
"learning_rate": 4.762278389459676e-06,
|
| 39246 |
+
"loss": 0.4733,
|
| 39247 |
+
"step": 55950
|
| 39248 |
+
},
|
| 39249 |
+
{
|
| 39250 |
+
"epoch": 5.604998247107728,
|
| 39251 |
+
"grad_norm": 1.940590262413025,
|
| 39252 |
+
"learning_rate": 4.75567309693557e-06,
|
| 39253 |
+
"loss": 0.4457,
|
| 39254 |
+
"step": 55960
|
| 39255 |
+
},
|
| 39256 |
+
{
|
| 39257 |
+
"epoch": 5.605999899834727,
|
| 39258 |
+
"grad_norm": 2.2044217586517334,
|
| 39259 |
+
"learning_rate": 4.749071906845199e-06,
|
| 39260 |
+
"loss": 0.4138,
|
| 39261 |
+
"step": 55970
|
| 39262 |
+
},
|
| 39263 |
+
{
|
| 39264 |
+
"epoch": 5.6070015525617265,
|
| 39265 |
+
"grad_norm": 2.072658061981201,
|
| 39266 |
+
"learning_rate": 4.742474820526269e-06,
|
| 39267 |
+
"loss": 0.4014,
|
| 39268 |
+
"step": 55980
|
| 39269 |
+
},
|
| 39270 |
+
{
|
| 39271 |
+
"epoch": 5.6080032052887265,
|
| 39272 |
+
"grad_norm": 2.6203811168670654,
|
| 39273 |
+
"learning_rate": 4.735881839315653e-06,
|
| 39274 |
+
"loss": 0.3985,
|
| 39275 |
+
"step": 55990
|
| 39276 |
+
},
|
| 39277 |
+
{
|
| 39278 |
+
"epoch": 5.609004858015726,
|
| 39279 |
+
"grad_norm": 2.3299992084503174,
|
| 39280 |
+
"learning_rate": 4.729292964549401e-06,
|
| 39281 |
+
"loss": 0.4411,
|
| 39282 |
+
"step": 56000
|
| 39283 |
+
},
|
| 39284 |
+
{
|
| 39285 |
+
"epoch": 5.610006510742726,
|
| 39286 |
+
"grad_norm": 2.6873273849487305,
|
| 39287 |
+
"learning_rate": 4.722708197562703e-06,
|
| 39288 |
+
"loss": 0.4068,
|
| 39289 |
+
"step": 56010
|
| 39290 |
+
},
|
| 39291 |
+
{
|
| 39292 |
+
"epoch": 5.611008163469725,
|
| 39293 |
+
"grad_norm": 2.430983066558838,
|
| 39294 |
+
"learning_rate": 4.716127539689955e-06,
|
| 39295 |
+
"loss": 0.3872,
|
| 39296 |
+
"step": 56020
|
| 39297 |
+
},
|
| 39298 |
+
{
|
| 39299 |
+
"epoch": 5.612009816196725,
|
| 39300 |
+
"grad_norm": 2.3532302379608154,
|
| 39301 |
+
"learning_rate": 4.709550992264697e-06,
|
| 39302 |
+
"loss": 0.4324,
|
| 39303 |
+
"step": 56030
|
| 39304 |
+
},
|
| 39305 |
+
{
|
| 39306 |
+
"epoch": 5.613011468923724,
|
| 39307 |
+
"grad_norm": 2.2957348823547363,
|
| 39308 |
+
"learning_rate": 4.702978556619636e-06,
|
| 39309 |
+
"loss": 0.4222,
|
| 39310 |
+
"step": 56040
|
| 39311 |
+
},
|
| 39312 |
+
{
|
| 39313 |
+
"epoch": 5.614013121650724,
|
| 39314 |
+
"grad_norm": 2.9766414165496826,
|
| 39315 |
+
"learning_rate": 4.696410234086657e-06,
|
| 39316 |
+
"loss": 0.4234,
|
| 39317 |
+
"step": 56050
|
| 39318 |
+
},
|
| 39319 |
+
{
|
| 39320 |
+
"epoch": 5.615014774377723,
|
| 39321 |
+
"grad_norm": 2.1183254718780518,
|
| 39322 |
+
"learning_rate": 4.689846025996799e-06,
|
| 39323 |
+
"loss": 0.3975,
|
| 39324 |
+
"step": 56060
|
| 39325 |
+
},
|
| 39326 |
+
{
|
| 39327 |
+
"epoch": 5.616016427104723,
|
| 39328 |
+
"grad_norm": 2.4658279418945312,
|
| 39329 |
+
"learning_rate": 4.683285933680279e-06,
|
| 39330 |
+
"loss": 0.3921,
|
| 39331 |
+
"step": 56070
|
| 39332 |
+
},
|
| 39333 |
+
{
|
| 39334 |
+
"epoch": 5.617018079831722,
|
| 39335 |
+
"grad_norm": 2.4801604747772217,
|
| 39336 |
+
"learning_rate": 4.676729958466466e-06,
|
| 39337 |
+
"loss": 0.3844,
|
| 39338 |
+
"step": 56080
|
| 39339 |
+
},
|
| 39340 |
+
{
|
| 39341 |
+
"epoch": 5.618019732558722,
|
| 39342 |
+
"grad_norm": 1.9644900560379028,
|
| 39343 |
+
"learning_rate": 4.670178101683911e-06,
|
| 39344 |
+
"loss": 0.42,
|
| 39345 |
+
"step": 56090
|
| 39346 |
+
},
|
| 39347 |
+
{
|
| 39348 |
+
"epoch": 5.619021385285722,
|
| 39349 |
+
"grad_norm": 2.1749885082244873,
|
| 39350 |
+
"learning_rate": 4.663630364660318e-06,
|
| 39351 |
+
"loss": 0.3942,
|
| 39352 |
+
"step": 56100
|
| 39353 |
+
},
|
| 39354 |
+
{
|
| 39355 |
+
"epoch": 5.620023038012721,
|
| 39356 |
+
"grad_norm": 2.22514271736145,
|
| 39357 |
+
"learning_rate": 4.657086748722561e-06,
|
| 39358 |
+
"loss": 0.3848,
|
| 39359 |
+
"step": 56110
|
| 39360 |
+
},
|
| 39361 |
+
{
|
| 39362 |
+
"epoch": 5.621024690739721,
|
| 39363 |
+
"grad_norm": 2.3103256225585938,
|
| 39364 |
+
"learning_rate": 4.6505472551966774e-06,
|
| 39365 |
+
"loss": 0.4226,
|
| 39366 |
+
"step": 56120
|
| 39367 |
+
},
|
| 39368 |
+
{
|
| 39369 |
+
"epoch": 5.62202634346672,
|
| 39370 |
+
"grad_norm": 2.485448122024536,
|
| 39371 |
+
"learning_rate": 4.64401188540787e-06,
|
| 39372 |
+
"loss": 0.4321,
|
| 39373 |
+
"step": 56130
|
| 39374 |
+
},
|
| 39375 |
+
{
|
| 39376 |
+
"epoch": 5.62302799619372,
|
| 39377 |
+
"grad_norm": 1.388026475906372,
|
| 39378 |
+
"learning_rate": 4.637480640680508e-06,
|
| 39379 |
+
"loss": 0.3392,
|
| 39380 |
+
"step": 56140
|
| 39381 |
+
},
|
| 39382 |
+
{
|
| 39383 |
+
"epoch": 5.624029648920719,
|
| 39384 |
+
"grad_norm": 2.1484484672546387,
|
| 39385 |
+
"learning_rate": 4.630953522338122e-06,
|
| 39386 |
+
"loss": 0.3976,
|
| 39387 |
+
"step": 56150
|
| 39388 |
+
},
|
| 39389 |
+
{
|
| 39390 |
+
"epoch": 5.625031301647718,
|
| 39391 |
+
"grad_norm": 2.381362199783325,
|
| 39392 |
+
"learning_rate": 4.624430531703405e-06,
|
| 39393 |
+
"loss": 0.4068,
|
| 39394 |
+
"step": 56160
|
| 39395 |
+
},
|
| 39396 |
+
{
|
| 39397 |
+
"epoch": 5.626032954374718,
|
| 39398 |
+
"grad_norm": 2.657985210418701,
|
| 39399 |
+
"learning_rate": 4.617911670098216e-06,
|
| 39400 |
+
"loss": 0.4177,
|
| 39401 |
+
"step": 56170
|
| 39402 |
+
},
|
| 39403 |
+
{
|
| 39404 |
+
"epoch": 5.627034607101718,
|
| 39405 |
+
"grad_norm": 1.6469602584838867,
|
| 39406 |
+
"learning_rate": 4.611396938843579e-06,
|
| 39407 |
+
"loss": 0.4121,
|
| 39408 |
+
"step": 56180
|
| 39409 |
+
},
|
| 39410 |
+
{
|
| 39411 |
+
"epoch": 5.6280362598287175,
|
| 39412 |
+
"grad_norm": 2.5772294998168945,
|
| 39413 |
+
"learning_rate": 4.6048863392596785e-06,
|
| 39414 |
+
"loss": 0.4089,
|
| 39415 |
+
"step": 56190
|
| 39416 |
+
},
|
| 39417 |
+
{
|
| 39418 |
+
"epoch": 5.629037912555717,
|
| 39419 |
+
"grad_norm": 1.9460467100143433,
|
| 39420 |
+
"learning_rate": 4.598379872665859e-06,
|
| 39421 |
+
"loss": 0.4098,
|
| 39422 |
+
"step": 56200
|
| 39423 |
+
},
|
| 39424 |
+
{
|
| 39425 |
+
"epoch": 5.630039565282717,
|
| 39426 |
+
"grad_norm": 2.7755281925201416,
|
| 39427 |
+
"learning_rate": 4.591877540380635e-06,
|
| 39428 |
+
"loss": 0.4216,
|
| 39429 |
+
"step": 56210
|
| 39430 |
+
},
|
| 39431 |
+
{
|
| 39432 |
+
"epoch": 5.631041218009716,
|
| 39433 |
+
"grad_norm": 2.41839337348938,
|
| 39434 |
+
"learning_rate": 4.585379343721674e-06,
|
| 39435 |
+
"loss": 0.4116,
|
| 39436 |
+
"step": 56220
|
| 39437 |
+
},
|
| 39438 |
+
{
|
| 39439 |
+
"epoch": 5.632042870736716,
|
| 39440 |
+
"grad_norm": 2.1798036098480225,
|
| 39441 |
+
"learning_rate": 4.578885284005816e-06,
|
| 39442 |
+
"loss": 0.3875,
|
| 39443 |
+
"step": 56230
|
| 39444 |
+
},
|
| 39445 |
+
{
|
| 39446 |
+
"epoch": 5.633044523463715,
|
| 39447 |
+
"grad_norm": 1.5918025970458984,
|
| 39448 |
+
"learning_rate": 4.5723953625490505e-06,
|
| 39449 |
+
"loss": 0.3922,
|
| 39450 |
+
"step": 56240
|
| 39451 |
+
},
|
| 39452 |
+
{
|
| 39453 |
+
"epoch": 5.634046176190715,
|
| 39454 |
+
"grad_norm": 2.385718584060669,
|
| 39455 |
+
"learning_rate": 4.565909580666539e-06,
|
| 39456 |
+
"loss": 0.4278,
|
| 39457 |
+
"step": 56250
|
| 39458 |
+
},
|
| 39459 |
+
{
|
| 39460 |
+
"epoch": 5.635047828917714,
|
| 39461 |
+
"grad_norm": 2.4254209995269775,
|
| 39462 |
+
"learning_rate": 4.559427939672595e-06,
|
| 39463 |
+
"loss": 0.4612,
|
| 39464 |
+
"step": 56260
|
| 39465 |
+
},
|
| 39466 |
+
{
|
| 39467 |
+
"epoch": 5.636049481644713,
|
| 39468 |
+
"grad_norm": 2.4982614517211914,
|
| 39469 |
+
"learning_rate": 4.552950440880704e-06,
|
| 39470 |
+
"loss": 0.4493,
|
| 39471 |
+
"step": 56270
|
| 39472 |
+
},
|
| 39473 |
+
{
|
| 39474 |
+
"epoch": 5.637051134371713,
|
| 39475 |
+
"grad_norm": 2.207980155944824,
|
| 39476 |
+
"learning_rate": 4.5464770856035036e-06,
|
| 39477 |
+
"loss": 0.3936,
|
| 39478 |
+
"step": 56280
|
| 39479 |
+
},
|
| 39480 |
+
{
|
| 39481 |
+
"epoch": 5.6380527870987125,
|
| 39482 |
+
"grad_norm": 2.4825491905212402,
|
| 39483 |
+
"learning_rate": 4.540007875152793e-06,
|
| 39484 |
+
"loss": 0.4166,
|
| 39485 |
+
"step": 56290
|
| 39486 |
+
},
|
| 39487 |
+
{
|
| 39488 |
+
"epoch": 5.6390544398257125,
|
| 39489 |
+
"grad_norm": 2.1901121139526367,
|
| 39490 |
+
"learning_rate": 4.533542810839531e-06,
|
| 39491 |
+
"loss": 0.3724,
|
| 39492 |
+
"step": 56300
|
| 39493 |
+
},
|
| 39494 |
+
{
|
| 39495 |
+
"epoch": 5.640056092552712,
|
| 39496 |
+
"grad_norm": 2.731470823287964,
|
| 39497 |
+
"learning_rate": 4.527081893973842e-06,
|
| 39498 |
+
"loss": 0.4289,
|
| 39499 |
+
"step": 56310
|
| 39500 |
+
},
|
| 39501 |
+
{
|
| 39502 |
+
"epoch": 5.641057745279712,
|
| 39503 |
+
"grad_norm": 2.079097032546997,
|
| 39504 |
+
"learning_rate": 4.520625125865003e-06,
|
| 39505 |
+
"loss": 0.3926,
|
| 39506 |
+
"step": 56320
|
| 39507 |
+
},
|
| 39508 |
+
{
|
| 39509 |
+
"epoch": 5.642059398006711,
|
| 39510 |
+
"grad_norm": 2.010434627532959,
|
| 39511 |
+
"learning_rate": 4.5141725078214486e-06,
|
| 39512 |
+
"loss": 0.4247,
|
| 39513 |
+
"step": 56330
|
| 39514 |
+
},
|
| 39515 |
+
{
|
| 39516 |
+
"epoch": 5.643061050733711,
|
| 39517 |
+
"grad_norm": 3.028982639312744,
|
| 39518 |
+
"learning_rate": 4.507724041150785e-06,
|
| 39519 |
+
"loss": 0.4586,
|
| 39520 |
+
"step": 56340
|
| 39521 |
+
},
|
| 39522 |
+
{
|
| 39523 |
+
"epoch": 5.64406270346071,
|
| 39524 |
+
"grad_norm": 2.4182560443878174,
|
| 39525 |
+
"learning_rate": 4.5012797271597625e-06,
|
| 39526 |
+
"loss": 0.4439,
|
| 39527 |
+
"step": 56350
|
| 39528 |
+
},
|
| 39529 |
+
{
|
| 39530 |
+
"epoch": 5.64506435618771,
|
| 39531 |
+
"grad_norm": 2.52976655960083,
|
| 39532 |
+
"learning_rate": 4.4948395671542995e-06,
|
| 39533 |
+
"loss": 0.5184,
|
| 39534 |
+
"step": 56360
|
| 39535 |
+
},
|
| 39536 |
+
{
|
| 39537 |
+
"epoch": 5.646066008914709,
|
| 39538 |
+
"grad_norm": 1.6455750465393066,
|
| 39539 |
+
"learning_rate": 4.488403562439466e-06,
|
| 39540 |
+
"loss": 0.4044,
|
| 39541 |
+
"step": 56370
|
| 39542 |
+
},
|
| 39543 |
+
{
|
| 39544 |
+
"epoch": 5.647067661641708,
|
| 39545 |
+
"grad_norm": 2.4018378257751465,
|
| 39546 |
+
"learning_rate": 4.481971714319497e-06,
|
| 39547 |
+
"loss": 0.3661,
|
| 39548 |
+
"step": 56380
|
| 39549 |
+
},
|
| 39550 |
+
{
|
| 39551 |
+
"epoch": 5.648069314368708,
|
| 39552 |
+
"grad_norm": 2.260538339614868,
|
| 39553 |
+
"learning_rate": 4.475544024097772e-06,
|
| 39554 |
+
"loss": 0.4011,
|
| 39555 |
+
"step": 56390
|
| 39556 |
+
},
|
| 39557 |
+
{
|
| 39558 |
+
"epoch": 5.6490709670957076,
|
| 39559 |
+
"grad_norm": 1.9844876527786255,
|
| 39560 |
+
"learning_rate": 4.469120493076856e-06,
|
| 39561 |
+
"loss": 0.3496,
|
| 39562 |
+
"step": 56400
|
| 39563 |
+
},
|
| 39564 |
+
{
|
| 39565 |
+
"epoch": 5.650072619822708,
|
| 39566 |
+
"grad_norm": 1.9300132989883423,
|
| 39567 |
+
"learning_rate": 4.462701122558438e-06,
|
| 39568 |
+
"loss": 0.4139,
|
| 39569 |
+
"step": 56410
|
| 39570 |
+
},
|
| 39571 |
+
{
|
| 39572 |
+
"epoch": 5.651074272549707,
|
| 39573 |
+
"grad_norm": 2.1562864780426025,
|
| 39574 |
+
"learning_rate": 4.456285913843381e-06,
|
| 39575 |
+
"loss": 0.4337,
|
| 39576 |
+
"step": 56420
|
| 39577 |
+
},
|
| 39578 |
+
{
|
| 39579 |
+
"epoch": 5.652075925276707,
|
| 39580 |
+
"grad_norm": 2.131909132003784,
|
| 39581 |
+
"learning_rate": 4.449874868231702e-06,
|
| 39582 |
+
"loss": 0.3717,
|
| 39583 |
+
"step": 56430
|
| 39584 |
+
},
|
| 39585 |
+
{
|
| 39586 |
+
"epoch": 5.653077578003706,
|
| 39587 |
+
"grad_norm": 2.2905733585357666,
|
| 39588 |
+
"learning_rate": 4.443467987022576e-06,
|
| 39589 |
+
"loss": 0.3997,
|
| 39590 |
+
"step": 56440
|
| 39591 |
+
},
|
| 39592 |
+
{
|
| 39593 |
+
"epoch": 5.654079230730706,
|
| 39594 |
+
"grad_norm": 1.802280068397522,
|
| 39595 |
+
"learning_rate": 4.437065271514332e-06,
|
| 39596 |
+
"loss": 0.4087,
|
| 39597 |
+
"step": 56450
|
| 39598 |
+
},
|
| 39599 |
+
{
|
| 39600 |
+
"epoch": 5.655080883457705,
|
| 39601 |
+
"grad_norm": 2.6998209953308105,
|
| 39602 |
+
"learning_rate": 4.4306667230044455e-06,
|
| 39603 |
+
"loss": 0.4461,
|
| 39604 |
+
"step": 56460
|
| 39605 |
+
},
|
| 39606 |
+
{
|
| 39607 |
+
"epoch": 5.656082536184705,
|
| 39608 |
+
"grad_norm": 2.262706756591797,
|
| 39609 |
+
"learning_rate": 4.424272342789581e-06,
|
| 39610 |
+
"loss": 0.4483,
|
| 39611 |
+
"step": 56470
|
| 39612 |
+
},
|
| 39613 |
+
{
|
| 39614 |
+
"epoch": 5.657084188911704,
|
| 39615 |
+
"grad_norm": 1.9895707368850708,
|
| 39616 |
+
"learning_rate": 4.4178821321655124e-06,
|
| 39617 |
+
"loss": 0.4614,
|
| 39618 |
+
"step": 56480
|
| 39619 |
+
},
|
| 39620 |
+
{
|
| 39621 |
+
"epoch": 5.658085841638703,
|
| 39622 |
+
"grad_norm": 1.780915379524231,
|
| 39623 |
+
"learning_rate": 4.4114960924272e-06,
|
| 39624 |
+
"loss": 0.4484,
|
| 39625 |
+
"step": 56490
|
| 39626 |
+
},
|
| 39627 |
+
{
|
| 39628 |
+
"epoch": 5.6590874943657035,
|
| 39629 |
+
"grad_norm": 3.3959758281707764,
|
| 39630 |
+
"learning_rate": 4.405114224868748e-06,
|
| 39631 |
+
"loss": 0.4559,
|
| 39632 |
+
"step": 56500
|
| 39633 |
+
},
|
| 39634 |
+
{
|
| 39635 |
+
"epoch": 5.660089147092703,
|
| 39636 |
+
"grad_norm": 1.9276880025863647,
|
| 39637 |
+
"learning_rate": 4.3987365307834175e-06,
|
| 39638 |
+
"loss": 0.4,
|
| 39639 |
+
"step": 56510
|
| 39640 |
+
},
|
| 39641 |
+
{
|
| 39642 |
+
"epoch": 5.661090799819703,
|
| 39643 |
+
"grad_norm": 2.4694437980651855,
|
| 39644 |
+
"learning_rate": 4.392363011463624e-06,
|
| 39645 |
+
"loss": 0.4421,
|
| 39646 |
+
"step": 56520
|
| 39647 |
+
},
|
| 39648 |
+
{
|
| 39649 |
+
"epoch": 5.662092452546702,
|
| 39650 |
+
"grad_norm": 2.1145718097686768,
|
| 39651 |
+
"learning_rate": 4.38599366820093e-06,
|
| 39652 |
+
"loss": 0.4019,
|
| 39653 |
+
"step": 56530
|
| 39654 |
+
},
|
| 39655 |
+
{
|
| 39656 |
+
"epoch": 5.663094105273702,
|
| 39657 |
+
"grad_norm": 2.955876350402832,
|
| 39658 |
+
"learning_rate": 4.379628502286073e-06,
|
| 39659 |
+
"loss": 0.4099,
|
| 39660 |
+
"step": 56540
|
| 39661 |
+
},
|
| 39662 |
+
{
|
| 39663 |
+
"epoch": 5.664095758000701,
|
| 39664 |
+
"grad_norm": 2.309077739715576,
|
| 39665 |
+
"learning_rate": 4.373267515008916e-06,
|
| 39666 |
+
"loss": 0.3277,
|
| 39667 |
+
"step": 56550
|
| 39668 |
+
},
|
| 39669 |
+
{
|
| 39670 |
+
"epoch": 5.665097410727701,
|
| 39671 |
+
"grad_norm": 2.708979368209839,
|
| 39672 |
+
"learning_rate": 4.366910707658492e-06,
|
| 39673 |
+
"loss": 0.4402,
|
| 39674 |
+
"step": 56560
|
| 39675 |
+
},
|
| 39676 |
+
{
|
| 39677 |
+
"epoch": 5.6660990634547,
|
| 39678 |
+
"grad_norm": 2.3290703296661377,
|
| 39679 |
+
"learning_rate": 4.360558081522975e-06,
|
| 39680 |
+
"loss": 0.403,
|
| 39681 |
+
"step": 56570
|
| 39682 |
+
},
|
| 39683 |
+
{
|
| 39684 |
+
"epoch": 5.6671007161817,
|
| 39685 |
+
"grad_norm": 1.8824762105941772,
|
| 39686 |
+
"learning_rate": 4.35420963788972e-06,
|
| 39687 |
+
"loss": 0.4044,
|
| 39688 |
+
"step": 56580
|
| 39689 |
+
},
|
| 39690 |
+
{
|
| 39691 |
+
"epoch": 5.668102368908699,
|
| 39692 |
+
"grad_norm": 2.1305978298187256,
|
| 39693 |
+
"learning_rate": 4.347865378045196e-06,
|
| 39694 |
+
"loss": 0.4064,
|
| 39695 |
+
"step": 56590
|
| 39696 |
+
},
|
| 39697 |
+
{
|
| 39698 |
+
"epoch": 5.6691040216356985,
|
| 39699 |
+
"grad_norm": 3.0818374156951904,
|
| 39700 |
+
"learning_rate": 4.341525303275043e-06,
|
| 39701 |
+
"loss": 0.3713,
|
| 39702 |
+
"step": 56600
|
| 39703 |
+
},
|
| 39704 |
+
{
|
| 39705 |
+
"epoch": 5.6701056743626985,
|
| 39706 |
+
"grad_norm": 2.7476348876953125,
|
| 39707 |
+
"learning_rate": 4.335189414864071e-06,
|
| 39708 |
+
"loss": 0.4019,
|
| 39709 |
+
"step": 56610
|
| 39710 |
+
},
|
| 39711 |
+
{
|
| 39712 |
+
"epoch": 5.671107327089698,
|
| 39713 |
+
"grad_norm": 2.616140604019165,
|
| 39714 |
+
"learning_rate": 4.328857714096204e-06,
|
| 39715 |
+
"loss": 0.4453,
|
| 39716 |
+
"step": 56620
|
| 39717 |
+
},
|
| 39718 |
+
{
|
| 39719 |
+
"epoch": 5.672108979816698,
|
| 39720 |
+
"grad_norm": 2.42598295211792,
|
| 39721 |
+
"learning_rate": 4.322530202254535e-06,
|
| 39722 |
+
"loss": 0.4153,
|
| 39723 |
+
"step": 56630
|
| 39724 |
+
},
|
| 39725 |
+
{
|
| 39726 |
+
"epoch": 5.673110632543697,
|
| 39727 |
+
"grad_norm": 2.014292001724243,
|
| 39728 |
+
"learning_rate": 4.316206880621324e-06,
|
| 39729 |
+
"loss": 0.3665,
|
| 39730 |
+
"step": 56640
|
| 39731 |
+
},
|
| 39732 |
+
{
|
| 39733 |
+
"epoch": 5.674112285270697,
|
| 39734 |
+
"grad_norm": 2.190354824066162,
|
| 39735 |
+
"learning_rate": 4.3098877504779644e-06,
|
| 39736 |
+
"loss": 0.4764,
|
| 39737 |
+
"step": 56650
|
| 39738 |
+
},
|
| 39739 |
+
{
|
| 39740 |
+
"epoch": 5.675113937997696,
|
| 39741 |
+
"grad_norm": 2.2174196243286133,
|
| 39742 |
+
"learning_rate": 4.303572813104995e-06,
|
| 39743 |
+
"loss": 0.401,
|
| 39744 |
+
"step": 56660
|
| 39745 |
+
},
|
| 39746 |
+
{
|
| 39747 |
+
"epoch": 5.676115590724696,
|
| 39748 |
+
"grad_norm": 2.3515331745147705,
|
| 39749 |
+
"learning_rate": 4.2972620697821094e-06,
|
| 39750 |
+
"loss": 0.3816,
|
| 39751 |
+
"step": 56670
|
| 39752 |
+
},
|
| 39753 |
+
{
|
| 39754 |
+
"epoch": 5.677117243451695,
|
| 39755 |
+
"grad_norm": 2.6349756717681885,
|
| 39756 |
+
"learning_rate": 4.29095552178817e-06,
|
| 39757 |
+
"loss": 0.4116,
|
| 39758 |
+
"step": 56680
|
| 39759 |
+
},
|
| 39760 |
+
{
|
| 39761 |
+
"epoch": 5.678118896178695,
|
| 39762 |
+
"grad_norm": 2.410928964614868,
|
| 39763 |
+
"learning_rate": 4.2846531704011734e-06,
|
| 39764 |
+
"loss": 0.3791,
|
| 39765 |
+
"step": 56690
|
| 39766 |
+
},
|
| 39767 |
+
{
|
| 39768 |
+
"epoch": 5.679120548905694,
|
| 39769 |
+
"grad_norm": 2.1149165630340576,
|
| 39770 |
+
"learning_rate": 4.278355016898247e-06,
|
| 39771 |
+
"loss": 0.4122,
|
| 39772 |
+
"step": 56700
|
| 39773 |
+
},
|
| 39774 |
+
{
|
| 39775 |
+
"epoch": 5.6801222016326935,
|
| 39776 |
+
"grad_norm": 2.3246634006500244,
|
| 39777 |
+
"learning_rate": 4.2720610625557075e-06,
|
| 39778 |
+
"loss": 0.4128,
|
| 39779 |
+
"step": 56710
|
| 39780 |
+
},
|
| 39781 |
+
{
|
| 39782 |
+
"epoch": 5.681123854359694,
|
| 39783 |
+
"grad_norm": 2.415755033493042,
|
| 39784 |
+
"learning_rate": 4.265771308649e-06,
|
| 39785 |
+
"loss": 0.3834,
|
| 39786 |
+
"step": 56720
|
| 39787 |
+
},
|
| 39788 |
+
{
|
| 39789 |
+
"epoch": 5.682125507086693,
|
| 39790 |
+
"grad_norm": 2.0188894271850586,
|
| 39791 |
+
"learning_rate": 4.259485756452703e-06,
|
| 39792 |
+
"loss": 0.3185,
|
| 39793 |
+
"step": 56730
|
| 39794 |
+
},
|
| 39795 |
+
{
|
| 39796 |
+
"epoch": 5.683127159813693,
|
| 39797 |
+
"grad_norm": 2.341989517211914,
|
| 39798 |
+
"learning_rate": 4.253204407240563e-06,
|
| 39799 |
+
"loss": 0.3937,
|
| 39800 |
+
"step": 56740
|
| 39801 |
+
},
|
| 39802 |
+
{
|
| 39803 |
+
"epoch": 5.684128812540692,
|
| 39804 |
+
"grad_norm": 1.688773274421692,
|
| 39805 |
+
"learning_rate": 4.24692726228548e-06,
|
| 39806 |
+
"loss": 0.4417,
|
| 39807 |
+
"step": 56750
|
| 39808 |
+
},
|
| 39809 |
+
{
|
| 39810 |
+
"epoch": 5.685130465267692,
|
| 39811 |
+
"grad_norm": 2.6181986331939697,
|
| 39812 |
+
"learning_rate": 4.2406543228594945e-06,
|
| 39813 |
+
"loss": 0.4274,
|
| 39814 |
+
"step": 56760
|
| 39815 |
+
},
|
| 39816 |
+
{
|
| 39817 |
+
"epoch": 5.686132117994691,
|
| 39818 |
+
"grad_norm": 2.2288084030151367,
|
| 39819 |
+
"learning_rate": 4.234385590233778e-06,
|
| 39820 |
+
"loss": 0.4571,
|
| 39821 |
+
"step": 56770
|
| 39822 |
+
},
|
| 39823 |
+
{
|
| 39824 |
+
"epoch": 5.687133770721691,
|
| 39825 |
+
"grad_norm": 2.591604471206665,
|
| 39826 |
+
"learning_rate": 4.228121065678675e-06,
|
| 39827 |
+
"loss": 0.3218,
|
| 39828 |
+
"step": 56780
|
| 39829 |
+
},
|
| 39830 |
+
{
|
| 39831 |
+
"epoch": 5.68813542344869,
|
| 39832 |
+
"grad_norm": 2.018723964691162,
|
| 39833 |
+
"learning_rate": 4.221860750463669e-06,
|
| 39834 |
+
"loss": 0.4008,
|
| 39835 |
+
"step": 56790
|
| 39836 |
+
},
|
| 39837 |
+
{
|
| 39838 |
+
"epoch": 5.68913707617569,
|
| 39839 |
+
"grad_norm": 2.4986484050750732,
|
| 39840 |
+
"learning_rate": 4.215604645857388e-06,
|
| 39841 |
+
"loss": 0.4571,
|
| 39842 |
+
"step": 56800
|
| 39843 |
+
},
|
| 39844 |
+
{
|
| 39845 |
+
"epoch": 5.6901387289026895,
|
| 39846 |
+
"grad_norm": 2.5996737480163574,
|
| 39847 |
+
"learning_rate": 4.2093527531275936e-06,
|
| 39848 |
+
"loss": 0.385,
|
| 39849 |
+
"step": 56810
|
| 39850 |
+
},
|
| 39851 |
+
{
|
| 39852 |
+
"epoch": 5.691140381629689,
|
| 39853 |
+
"grad_norm": 2.841071605682373,
|
| 39854 |
+
"learning_rate": 4.203105073541225e-06,
|
| 39855 |
+
"loss": 0.4209,
|
| 39856 |
+
"step": 56820
|
| 39857 |
+
},
|
| 39858 |
+
{
|
| 39859 |
+
"epoch": 5.692142034356689,
|
| 39860 |
+
"grad_norm": 2.406714916229248,
|
| 39861 |
+
"learning_rate": 4.196861608364347e-06,
|
| 39862 |
+
"loss": 0.4076,
|
| 39863 |
+
"step": 56830
|
| 39864 |
+
},
|
| 39865 |
+
{
|
| 39866 |
+
"epoch": 5.693143687083688,
|
| 39867 |
+
"grad_norm": 2.3181378841400146,
|
| 39868 |
+
"learning_rate": 4.190622358862156e-06,
|
| 39869 |
+
"loss": 0.4396,
|
| 39870 |
+
"step": 56840
|
| 39871 |
+
},
|
| 39872 |
+
{
|
| 39873 |
+
"epoch": 5.694145339810688,
|
| 39874 |
+
"grad_norm": 1.4654369354248047,
|
| 39875 |
+
"learning_rate": 4.184387326299033e-06,
|
| 39876 |
+
"loss": 0.4027,
|
| 39877 |
+
"step": 56850
|
| 39878 |
+
},
|
| 39879 |
+
{
|
| 39880 |
+
"epoch": 5.695146992537687,
|
| 39881 |
+
"grad_norm": 2.1411664485931396,
|
| 39882 |
+
"learning_rate": 4.178156511938475e-06,
|
| 39883 |
+
"loss": 0.3841,
|
| 39884 |
+
"step": 56860
|
| 39885 |
+
},
|
| 39886 |
+
{
|
| 39887 |
+
"epoch": 5.696148645264687,
|
| 39888 |
+
"grad_norm": 2.9601173400878906,
|
| 39889 |
+
"learning_rate": 4.171929917043138e-06,
|
| 39890 |
+
"loss": 0.3403,
|
| 39891 |
+
"step": 56870
|
| 39892 |
+
},
|
| 39893 |
+
{
|
| 39894 |
+
"epoch": 5.697150297991686,
|
| 39895 |
+
"grad_norm": 2.155308961868286,
|
| 39896 |
+
"learning_rate": 4.1657075428748e-06,
|
| 39897 |
+
"loss": 0.4152,
|
| 39898 |
+
"step": 56880
|
| 39899 |
+
},
|
| 39900 |
+
{
|
| 39901 |
+
"epoch": 5.698151950718686,
|
| 39902 |
+
"grad_norm": 2.5320613384246826,
|
| 39903 |
+
"learning_rate": 4.159489390694418e-06,
|
| 39904 |
+
"loss": 0.4124,
|
| 39905 |
+
"step": 56890
|
| 39906 |
+
},
|
| 39907 |
+
{
|
| 39908 |
+
"epoch": 5.699153603445685,
|
| 39909 |
+
"grad_norm": 2.0077009201049805,
|
| 39910 |
+
"learning_rate": 4.153275461762068e-06,
|
| 39911 |
+
"loss": 0.4227,
|
| 39912 |
+
"step": 56900
|
| 39913 |
+
},
|
| 39914 |
+
{
|
| 39915 |
+
"epoch": 5.700155256172685,
|
| 39916 |
+
"grad_norm": 3.2072668075561523,
|
| 39917 |
+
"learning_rate": 4.147065757336982e-06,
|
| 39918 |
+
"loss": 0.4399,
|
| 39919 |
+
"step": 56910
|
| 39920 |
+
},
|
| 39921 |
+
{
|
| 39922 |
+
"epoch": 5.7011569088996845,
|
| 39923 |
+
"grad_norm": 2.669921398162842,
|
| 39924 |
+
"learning_rate": 4.14086027867753e-06,
|
| 39925 |
+
"loss": 0.3902,
|
| 39926 |
+
"step": 56920
|
| 39927 |
+
},
|
| 39928 |
+
{
|
| 39929 |
+
"epoch": 5.702158561626684,
|
| 39930 |
+
"grad_norm": 2.002126932144165,
|
| 39931 |
+
"learning_rate": 4.1346590270412294e-06,
|
| 39932 |
+
"loss": 0.4153,
|
| 39933 |
+
"step": 56930
|
| 39934 |
+
},
|
| 39935 |
+
{
|
| 39936 |
+
"epoch": 5.703160214353684,
|
| 39937 |
+
"grad_norm": 2.2236478328704834,
|
| 39938 |
+
"learning_rate": 4.128462003684746e-06,
|
| 39939 |
+
"loss": 0.3807,
|
| 39940 |
+
"step": 56940
|
| 39941 |
+
},
|
| 39942 |
+
{
|
| 39943 |
+
"epoch": 5.704161867080683,
|
| 39944 |
+
"grad_norm": 2.695922374725342,
|
| 39945 |
+
"learning_rate": 4.122269209863863e-06,
|
| 39946 |
+
"loss": 0.3892,
|
| 39947 |
+
"step": 56950
|
| 39948 |
+
},
|
| 39949 |
+
{
|
| 39950 |
+
"epoch": 5.705163519807683,
|
| 39951 |
+
"grad_norm": 1.9009491205215454,
|
| 39952 |
+
"learning_rate": 4.116080646833545e-06,
|
| 39953 |
+
"loss": 0.4313,
|
| 39954 |
+
"step": 56960
|
| 39955 |
+
},
|
| 39956 |
+
{
|
| 39957 |
+
"epoch": 5.706165172534682,
|
| 39958 |
+
"grad_norm": 2.272822618484497,
|
| 39959 |
+
"learning_rate": 4.109896315847872e-06,
|
| 39960 |
+
"loss": 0.4232,
|
| 39961 |
+
"step": 56970
|
| 39962 |
+
},
|
| 39963 |
+
{
|
| 39964 |
+
"epoch": 5.707166825261682,
|
| 39965 |
+
"grad_norm": 2.110412120819092,
|
| 39966 |
+
"learning_rate": 4.103716218160078e-06,
|
| 39967 |
+
"loss": 0.4574,
|
| 39968 |
+
"step": 56980
|
| 39969 |
+
},
|
| 39970 |
+
{
|
| 39971 |
+
"epoch": 5.708168477988681,
|
| 39972 |
+
"grad_norm": 2.2217419147491455,
|
| 39973 |
+
"learning_rate": 4.097540355022533e-06,
|
| 39974 |
+
"loss": 0.4325,
|
| 39975 |
+
"step": 56990
|
| 39976 |
+
},
|
| 39977 |
+
{
|
| 39978 |
+
"epoch": 5.709170130715681,
|
| 39979 |
+
"grad_norm": 2.248148202896118,
|
| 39980 |
+
"learning_rate": 4.091368727686751e-06,
|
| 39981 |
+
"loss": 0.3897,
|
| 39982 |
+
"step": 57000
|
| 39983 |
+
},
|
| 39984 |
+
{
|
| 39985 |
+
"epoch": 5.71017178344268,
|
| 39986 |
+
"grad_norm": 2.181851387023926,
|
| 39987 |
+
"learning_rate": 4.085201337403391e-06,
|
| 39988 |
+
"loss": 0.3745,
|
| 39989 |
+
"step": 57010
|
| 39990 |
+
},
|
| 39991 |
+
{
|
| 39992 |
+
"epoch": 5.71117343616968,
|
| 39993 |
+
"grad_norm": 2.311033248901367,
|
| 39994 |
+
"learning_rate": 4.079038185422249e-06,
|
| 39995 |
+
"loss": 0.4625,
|
| 39996 |
+
"step": 57020
|
| 39997 |
+
},
|
| 39998 |
+
{
|
| 39999 |
+
"epoch": 5.71217508889668,
|
| 40000 |
+
"grad_norm": 2.210078477859497,
|
| 40001 |
+
"learning_rate": 4.072879272992264e-06,
|
| 40002 |
+
"loss": 0.4895,
|
| 40003 |
+
"step": 57030
|
| 40004 |
+
},
|
| 40005 |
+
{
|
| 40006 |
+
"epoch": 5.713176741623679,
|
| 40007 |
+
"grad_norm": 1.854807734489441,
|
| 40008 |
+
"learning_rate": 4.066724601361513e-06,
|
| 40009 |
+
"loss": 0.3613,
|
| 40010 |
+
"step": 57040
|
| 40011 |
+
},
|
| 40012 |
+
{
|
| 40013 |
+
"epoch": 5.714178394350679,
|
| 40014 |
+
"grad_norm": 2.825054168701172,
|
| 40015 |
+
"learning_rate": 4.060574171777218e-06,
|
| 40016 |
+
"loss": 0.4114,
|
| 40017 |
+
"step": 57050
|
| 40018 |
+
},
|
| 40019 |
+
{
|
| 40020 |
+
"epoch": 5.715180047077678,
|
| 40021 |
+
"grad_norm": 1.955674648284912,
|
| 40022 |
+
"learning_rate": 4.054427985485743e-06,
|
| 40023 |
+
"loss": 0.3402,
|
| 40024 |
+
"step": 57060
|
| 40025 |
+
},
|
| 40026 |
+
{
|
| 40027 |
+
"epoch": 5.716181699804678,
|
| 40028 |
+
"grad_norm": 2.44513201713562,
|
| 40029 |
+
"learning_rate": 4.048286043732583e-06,
|
| 40030 |
+
"loss": 0.4334,
|
| 40031 |
+
"step": 57070
|
| 40032 |
+
},
|
| 40033 |
+
{
|
| 40034 |
+
"epoch": 5.717183352531677,
|
| 40035 |
+
"grad_norm": 2.0144870281219482,
|
| 40036 |
+
"learning_rate": 4.042148347762381e-06,
|
| 40037 |
+
"loss": 0.4543,
|
| 40038 |
+
"step": 57080
|
| 40039 |
+
},
|
| 40040 |
+
{
|
| 40041 |
+
"epoch": 5.718185005258677,
|
| 40042 |
+
"grad_norm": 1.8024274110794067,
|
| 40043 |
+
"learning_rate": 4.036014898818919e-06,
|
| 40044 |
+
"loss": 0.3996,
|
| 40045 |
+
"step": 57090
|
| 40046 |
+
},
|
| 40047 |
+
{
|
| 40048 |
+
"epoch": 5.719186657985676,
|
| 40049 |
+
"grad_norm": 2.058702230453491,
|
| 40050 |
+
"learning_rate": 4.029885698145111e-06,
|
| 40051 |
+
"loss": 0.4036,
|
| 40052 |
+
"step": 57100
|
| 40053 |
+
},
|
| 40054 |
+
{
|
| 40055 |
+
"epoch": 5.720188310712675,
|
| 40056 |
+
"grad_norm": 2.6458261013031006,
|
| 40057 |
+
"learning_rate": 4.023760746983022e-06,
|
| 40058 |
+
"loss": 0.4333,
|
| 40059 |
+
"step": 57110
|
| 40060 |
+
},
|
| 40061 |
+
{
|
| 40062 |
+
"epoch": 5.7211899634396755,
|
| 40063 |
+
"grad_norm": 2.839874744415283,
|
| 40064 |
+
"learning_rate": 4.017640046573845e-06,
|
| 40065 |
+
"loss": 0.3451,
|
| 40066 |
+
"step": 57120
|
| 40067 |
+
},
|
| 40068 |
+
{
|
| 40069 |
+
"epoch": 5.7221916161666755,
|
| 40070 |
+
"grad_norm": 2.2048985958099365,
|
| 40071 |
+
"learning_rate": 4.011523598157918e-06,
|
| 40072 |
+
"loss": 0.3909,
|
| 40073 |
+
"step": 57130
|
| 40074 |
+
},
|
| 40075 |
+
{
|
| 40076 |
+
"epoch": 5.723193268893675,
|
| 40077 |
+
"grad_norm": 2.1822943687438965,
|
| 40078 |
+
"learning_rate": 4.005411402974715e-06,
|
| 40079 |
+
"loss": 0.3479,
|
| 40080 |
+
"step": 57140
|
| 40081 |
+
},
|
| 40082 |
+
{
|
| 40083 |
+
"epoch": 5.724194921620674,
|
| 40084 |
+
"grad_norm": 2.527682304382324,
|
| 40085 |
+
"learning_rate": 3.999303462262849e-06,
|
| 40086 |
+
"loss": 0.4637,
|
| 40087 |
+
"step": 57150
|
| 40088 |
+
},
|
| 40089 |
+
{
|
| 40090 |
+
"epoch": 5.725196574347674,
|
| 40091 |
+
"grad_norm": 1.8430290222167969,
|
| 40092 |
+
"learning_rate": 3.993199777260068e-06,
|
| 40093 |
+
"loss": 0.5038,
|
| 40094 |
+
"step": 57160
|
| 40095 |
+
},
|
| 40096 |
+
{
|
| 40097 |
+
"epoch": 5.726198227074673,
|
| 40098 |
+
"grad_norm": 1.6539762020111084,
|
| 40099 |
+
"learning_rate": 3.987100349203262e-06,
|
| 40100 |
+
"loss": 0.3546,
|
| 40101 |
+
"step": 57170
|
| 40102 |
+
},
|
| 40103 |
+
{
|
| 40104 |
+
"epoch": 5.727199879801673,
|
| 40105 |
+
"grad_norm": 2.591881275177002,
|
| 40106 |
+
"learning_rate": 3.9810051793284564e-06,
|
| 40107 |
+
"loss": 0.427,
|
| 40108 |
+
"step": 57180
|
| 40109 |
+
},
|
| 40110 |
+
{
|
| 40111 |
+
"epoch": 5.728201532528672,
|
| 40112 |
+
"grad_norm": 1.6094529628753662,
|
| 40113 |
+
"learning_rate": 3.974914268870814e-06,
|
| 40114 |
+
"loss": 0.3898,
|
| 40115 |
+
"step": 57190
|
| 40116 |
+
},
|
| 40117 |
+
{
|
| 40118 |
+
"epoch": 5.729203185255672,
|
| 40119 |
+
"grad_norm": 2.5625104904174805,
|
| 40120 |
+
"learning_rate": 3.96882761906463e-06,
|
| 40121 |
+
"loss": 0.3754,
|
| 40122 |
+
"step": 57200
|
| 40123 |
+
},
|
| 40124 |
+
{
|
| 40125 |
+
"epoch": 5.730204837982671,
|
| 40126 |
+
"grad_norm": 2.2329931259155273,
|
| 40127 |
+
"learning_rate": 3.962745231143347e-06,
|
| 40128 |
+
"loss": 0.3621,
|
| 40129 |
+
"step": 57210
|
| 40130 |
+
},
|
| 40131 |
+
{
|
| 40132 |
+
"epoch": 5.7312064907096705,
|
| 40133 |
+
"grad_norm": 2.146312952041626,
|
| 40134 |
+
"learning_rate": 3.9566671063395286e-06,
|
| 40135 |
+
"loss": 0.4004,
|
| 40136 |
+
"step": 57220
|
| 40137 |
+
},
|
| 40138 |
+
{
|
| 40139 |
+
"epoch": 5.7322081434366705,
|
| 40140 |
+
"grad_norm": 2.2972376346588135,
|
| 40141 |
+
"learning_rate": 3.950593245884893e-06,
|
| 40142 |
+
"loss": 0.4075,
|
| 40143 |
+
"step": 57230
|
| 40144 |
+
},
|
| 40145 |
+
{
|
| 40146 |
+
"epoch": 5.7332097961636705,
|
| 40147 |
+
"grad_norm": 2.4375569820404053,
|
| 40148 |
+
"learning_rate": 3.944523651010276e-06,
|
| 40149 |
+
"loss": 0.4456,
|
| 40150 |
+
"step": 57240
|
| 40151 |
+
},
|
| 40152 |
+
{
|
| 40153 |
+
"epoch": 5.73421144889067,
|
| 40154 |
+
"grad_norm": 2.322031021118164,
|
| 40155 |
+
"learning_rate": 3.93845832294566e-06,
|
| 40156 |
+
"loss": 0.397,
|
| 40157 |
+
"step": 57250
|
| 40158 |
+
},
|
| 40159 |
+
{
|
| 40160 |
+
"epoch": 5.735213101617669,
|
| 40161 |
+
"grad_norm": 3.1206319332122803,
|
| 40162 |
+
"learning_rate": 3.932397262920162e-06,
|
| 40163 |
+
"loss": 0.3815,
|
| 40164 |
+
"step": 57260
|
| 40165 |
+
},
|
| 40166 |
+
{
|
| 40167 |
+
"epoch": 5.736214754344669,
|
| 40168 |
+
"grad_norm": 2.2064762115478516,
|
| 40169 |
+
"learning_rate": 3.926340472162029e-06,
|
| 40170 |
+
"loss": 0.3571,
|
| 40171 |
+
"step": 57270
|
| 40172 |
+
},
|
| 40173 |
+
{
|
| 40174 |
+
"epoch": 5.737216407071668,
|
| 40175 |
+
"grad_norm": 2.8701906204223633,
|
| 40176 |
+
"learning_rate": 3.920287951898649e-06,
|
| 40177 |
+
"loss": 0.4011,
|
| 40178 |
+
"step": 57280
|
| 40179 |
+
},
|
| 40180 |
+
{
|
| 40181 |
+
"epoch": 5.738218059798668,
|
| 40182 |
+
"grad_norm": 2.4009292125701904,
|
| 40183 |
+
"learning_rate": 3.914239703356537e-06,
|
| 40184 |
+
"loss": 0.4515,
|
| 40185 |
+
"step": 57290
|
| 40186 |
+
},
|
| 40187 |
+
{
|
| 40188 |
+
"epoch": 5.739219712525667,
|
| 40189 |
+
"grad_norm": 2.3145830631256104,
|
| 40190 |
+
"learning_rate": 3.908195727761354e-06,
|
| 40191 |
+
"loss": 0.4274,
|
| 40192 |
+
"step": 57300
|
| 40193 |
+
},
|
| 40194 |
+
{
|
| 40195 |
+
"epoch": 5.740221365252667,
|
| 40196 |
+
"grad_norm": 1.9337557554244995,
|
| 40197 |
+
"learning_rate": 3.902156026337881e-06,
|
| 40198 |
+
"loss": 0.4392,
|
| 40199 |
+
"step": 57310
|
| 40200 |
+
},
|
| 40201 |
+
{
|
| 40202 |
+
"epoch": 5.741223017979666,
|
| 40203 |
+
"grad_norm": 1.9809796810150146,
|
| 40204 |
+
"learning_rate": 3.896120600310044e-06,
|
| 40205 |
+
"loss": 0.4294,
|
| 40206 |
+
"step": 57320
|
| 40207 |
+
},
|
| 40208 |
+
{
|
| 40209 |
+
"epoch": 5.7422246707066655,
|
| 40210 |
+
"grad_norm": 1.9879062175750732,
|
| 40211 |
+
"learning_rate": 3.8900894509008965e-06,
|
| 40212 |
+
"loss": 0.3999,
|
| 40213 |
+
"step": 57330
|
| 40214 |
+
},
|
| 40215 |
+
{
|
| 40216 |
+
"epoch": 5.743226323433666,
|
| 40217 |
+
"grad_norm": 2.3233344554901123,
|
| 40218 |
+
"learning_rate": 3.884062579332631e-06,
|
| 40219 |
+
"loss": 0.3766,
|
| 40220 |
+
"step": 57340
|
| 40221 |
+
},
|
| 40222 |
+
{
|
| 40223 |
+
"epoch": 5.744227976160665,
|
| 40224 |
+
"grad_norm": 2.4049007892608643,
|
| 40225 |
+
"learning_rate": 3.878039986826565e-06,
|
| 40226 |
+
"loss": 0.3805,
|
| 40227 |
+
"step": 57350
|
| 40228 |
+
},
|
| 40229 |
+
{
|
| 40230 |
+
"epoch": 5.745229628887665,
|
| 40231 |
+
"grad_norm": 1.9182034730911255,
|
| 40232 |
+
"learning_rate": 3.872021674603149e-06,
|
| 40233 |
+
"loss": 0.4208,
|
| 40234 |
+
"step": 57360
|
| 40235 |
+
},
|
| 40236 |
+
{
|
| 40237 |
+
"epoch": 5.746231281614664,
|
| 40238 |
+
"grad_norm": 1.813327431678772,
|
| 40239 |
+
"learning_rate": 3.866007643881989e-06,
|
| 40240 |
+
"loss": 0.3607,
|
| 40241 |
+
"step": 57370
|
| 40242 |
+
},
|
| 40243 |
+
{
|
| 40244 |
+
"epoch": 5.747232934341664,
|
| 40245 |
+
"grad_norm": 2.2906525135040283,
|
| 40246 |
+
"learning_rate": 3.859997895881784e-06,
|
| 40247 |
+
"loss": 0.4138,
|
| 40248 |
+
"step": 57380
|
| 40249 |
+
},
|
| 40250 |
+
{
|
| 40251 |
+
"epoch": 5.748234587068663,
|
| 40252 |
+
"grad_norm": 2.034022808074951,
|
| 40253 |
+
"learning_rate": 3.853992431820397e-06,
|
| 40254 |
+
"loss": 0.4407,
|
| 40255 |
+
"step": 57390
|
| 40256 |
+
},
|
| 40257 |
+
{
|
| 40258 |
+
"epoch": 5.749236239795663,
|
| 40259 |
+
"grad_norm": 1.5390911102294922,
|
| 40260 |
+
"learning_rate": 3.847991252914801e-06,
|
| 40261 |
+
"loss": 0.3742,
|
| 40262 |
+
"step": 57400
|
| 40263 |
+
},
|
| 40264 |
+
{
|
| 40265 |
+
"epoch": 5.750237892522662,
|
| 40266 |
+
"grad_norm": 2.2236108779907227,
|
| 40267 |
+
"learning_rate": 3.841994360381135e-06,
|
| 40268 |
+
"loss": 0.4096,
|
| 40269 |
+
"step": 57410
|
| 40270 |
+
},
|
| 40271 |
+
{
|
| 40272 |
+
"epoch": 5.751239545249662,
|
| 40273 |
+
"grad_norm": 1.9090604782104492,
|
| 40274 |
+
"learning_rate": 3.836001755434621e-06,
|
| 40275 |
+
"loss": 0.4472,
|
| 40276 |
+
"step": 57420
|
| 40277 |
+
},
|
| 40278 |
+
{
|
| 40279 |
+
"epoch": 5.7522411979766614,
|
| 40280 |
+
"grad_norm": 2.017423152923584,
|
| 40281 |
+
"learning_rate": 3.830013439289643e-06,
|
| 40282 |
+
"loss": 0.3648,
|
| 40283 |
+
"step": 57430
|
| 40284 |
+
},
|
| 40285 |
+
{
|
| 40286 |
+
"epoch": 5.753242850703661,
|
| 40287 |
+
"grad_norm": 2.2559385299682617,
|
| 40288 |
+
"learning_rate": 3.824029413159722e-06,
|
| 40289 |
+
"loss": 0.4173,
|
| 40290 |
+
"step": 57440
|
| 40291 |
+
},
|
| 40292 |
+
{
|
| 40293 |
+
"epoch": 5.754244503430661,
|
| 40294 |
+
"grad_norm": 2.4844183921813965,
|
| 40295 |
+
"learning_rate": 3.818049678257485e-06,
|
| 40296 |
+
"loss": 0.4444,
|
| 40297 |
+
"step": 57450
|
| 40298 |
+
},
|
| 40299 |
+
{
|
| 40300 |
+
"epoch": 5.75524615615766,
|
| 40301 |
+
"grad_norm": 2.446603298187256,
|
| 40302 |
+
"learning_rate": 3.8120742357947047e-06,
|
| 40303 |
+
"loss": 0.4109,
|
| 40304 |
+
"step": 57460
|
| 40305 |
+
},
|
| 40306 |
+
{
|
| 40307 |
+
"epoch": 5.75624780888466,
|
| 40308 |
+
"grad_norm": 1.6203423738479614,
|
| 40309 |
+
"learning_rate": 3.806103086982277e-06,
|
| 40310 |
+
"loss": 0.3617,
|
| 40311 |
+
"step": 57470
|
| 40312 |
+
},
|
| 40313 |
+
{
|
| 40314 |
+
"epoch": 5.757249461611659,
|
| 40315 |
+
"grad_norm": 1.9405463933944702,
|
| 40316 |
+
"learning_rate": 3.800136233030249e-06,
|
| 40317 |
+
"loss": 0.3936,
|
| 40318 |
+
"step": 57480
|
| 40319 |
+
},
|
| 40320 |
+
{
|
| 40321 |
+
"epoch": 5.758251114338659,
|
| 40322 |
+
"grad_norm": 2.4396026134490967,
|
| 40323 |
+
"learning_rate": 3.7941736751477636e-06,
|
| 40324 |
+
"loss": 0.3951,
|
| 40325 |
+
"step": 57490
|
| 40326 |
+
},
|
| 40327 |
+
{
|
| 40328 |
+
"epoch": 5.759252767065658,
|
| 40329 |
+
"grad_norm": 1.5259758234024048,
|
| 40330 |
+
"learning_rate": 3.788215414543106e-06,
|
| 40331 |
+
"loss": 0.4551,
|
| 40332 |
+
"step": 57500
|
| 40333 |
+
},
|
| 40334 |
+
{
|
| 40335 |
+
"epoch": 5.760254419792658,
|
| 40336 |
+
"grad_norm": 2.282928943634033,
|
| 40337 |
+
"learning_rate": 3.7822614524237106e-06,
|
| 40338 |
+
"loss": 0.4045,
|
| 40339 |
+
"step": 57510
|
| 40340 |
+
},
|
| 40341 |
+
{
|
| 40342 |
+
"epoch": 5.761256072519657,
|
| 40343 |
+
"grad_norm": 2.7155280113220215,
|
| 40344 |
+
"learning_rate": 3.776311789996123e-06,
|
| 40345 |
+
"loss": 0.413,
|
| 40346 |
+
"step": 57520
|
| 40347 |
+
},
|
| 40348 |
+
{
|
| 40349 |
+
"epoch": 5.762257725246657,
|
| 40350 |
+
"grad_norm": 1.9408833980560303,
|
| 40351 |
+
"learning_rate": 3.7703664284660046e-06,
|
| 40352 |
+
"loss": 0.3975,
|
| 40353 |
+
"step": 57530
|
| 40354 |
+
},
|
| 40355 |
+
{
|
| 40356 |
+
"epoch": 5.7632593779736565,
|
| 40357 |
+
"grad_norm": 2.8400866985321045,
|
| 40358 |
+
"learning_rate": 3.764425369038166e-06,
|
| 40359 |
+
"loss": 0.3971,
|
| 40360 |
+
"step": 57540
|
| 40361 |
+
},
|
| 40362 |
+
{
|
| 40363 |
+
"epoch": 5.764261030700656,
|
| 40364 |
+
"grad_norm": 2.386186361312866,
|
| 40365 |
+
"learning_rate": 3.7584886129165497e-06,
|
| 40366 |
+
"loss": 0.3978,
|
| 40367 |
+
"step": 57550
|
| 40368 |
+
},
|
| 40369 |
+
{
|
| 40370 |
+
"epoch": 5.765262683427656,
|
| 40371 |
+
"grad_norm": 2.210965394973755,
|
| 40372 |
+
"learning_rate": 3.7525561613042047e-06,
|
| 40373 |
+
"loss": 0.4675,
|
| 40374 |
+
"step": 57560
|
| 40375 |
+
},
|
| 40376 |
+
{
|
| 40377 |
+
"epoch": 5.766264336154655,
|
| 40378 |
+
"grad_norm": 2.6471710205078125,
|
| 40379 |
+
"learning_rate": 3.7466280154033168e-06,
|
| 40380 |
+
"loss": 0.3918,
|
| 40381 |
+
"step": 57570
|
| 40382 |
+
},
|
| 40383 |
+
{
|
| 40384 |
+
"epoch": 5.767265988881655,
|
| 40385 |
+
"grad_norm": 2.123166561126709,
|
| 40386 |
+
"learning_rate": 3.7407041764152134e-06,
|
| 40387 |
+
"loss": 0.4137,
|
| 40388 |
+
"step": 57580
|
| 40389 |
+
},
|
| 40390 |
+
{
|
| 40391 |
+
"epoch": 5.768267641608654,
|
| 40392 |
+
"grad_norm": 2.8771538734436035,
|
| 40393 |
+
"learning_rate": 3.7347846455403375e-06,
|
| 40394 |
+
"loss": 0.398,
|
| 40395 |
+
"step": 57590
|
| 40396 |
+
},
|
| 40397 |
+
{
|
| 40398 |
+
"epoch": 5.769269294335654,
|
| 40399 |
+
"grad_norm": 2.2912991046905518,
|
| 40400 |
+
"learning_rate": 3.7288694239782457e-06,
|
| 40401 |
+
"loss": 0.3896,
|
| 40402 |
+
"step": 57600
|
| 40403 |
+
},
|
| 40404 |
+
{
|
| 40405 |
+
"epoch": 5.770270947062653,
|
| 40406 |
+
"grad_norm": 2.3267202377319336,
|
| 40407 |
+
"learning_rate": 3.722958512927635e-06,
|
| 40408 |
+
"loss": 0.3696,
|
| 40409 |
+
"step": 57610
|
| 40410 |
+
},
|
| 40411 |
+
{
|
| 40412 |
+
"epoch": 5.771272599789653,
|
| 40413 |
+
"grad_norm": 2.1532654762268066,
|
| 40414 |
+
"learning_rate": 3.717051913586342e-06,
|
| 40415 |
+
"loss": 0.3868,
|
| 40416 |
+
"step": 57620
|
| 40417 |
+
},
|
| 40418 |
+
{
|
| 40419 |
+
"epoch": 5.772274252516652,
|
| 40420 |
+
"grad_norm": 2.059478998184204,
|
| 40421 |
+
"learning_rate": 3.711149627151314e-06,
|
| 40422 |
+
"loss": 0.4399,
|
| 40423 |
+
"step": 57630
|
| 40424 |
+
},
|
| 40425 |
+
{
|
| 40426 |
+
"epoch": 5.773275905243652,
|
| 40427 |
+
"grad_norm": 2.340069532394409,
|
| 40428 |
+
"learning_rate": 3.7052516548186117e-06,
|
| 40429 |
+
"loss": 0.5034,
|
| 40430 |
+
"step": 57640
|
| 40431 |
+
},
|
| 40432 |
+
{
|
| 40433 |
+
"epoch": 5.774277557970652,
|
| 40434 |
+
"grad_norm": 2.296865940093994,
|
| 40435 |
+
"learning_rate": 3.699357997783451e-06,
|
| 40436 |
+
"loss": 0.3821,
|
| 40437 |
+
"step": 57650
|
| 40438 |
+
},
|
| 40439 |
+
{
|
| 40440 |
+
"epoch": 5.775279210697651,
|
| 40441 |
+
"grad_norm": 2.828794002532959,
|
| 40442 |
+
"learning_rate": 3.6934686572401567e-06,
|
| 40443 |
+
"loss": 0.4119,
|
| 40444 |
+
"step": 57660
|
| 40445 |
+
},
|
| 40446 |
+
{
|
| 40447 |
+
"epoch": 5.776280863424651,
|
| 40448 |
+
"grad_norm": 2.2535436153411865,
|
| 40449 |
+
"learning_rate": 3.6875836343821747e-06,
|
| 40450 |
+
"loss": 0.4178,
|
| 40451 |
+
"step": 57670
|
| 40452 |
+
},
|
| 40453 |
+
{
|
| 40454 |
+
"epoch": 5.77728251615165,
|
| 40455 |
+
"grad_norm": 2.087688684463501,
|
| 40456 |
+
"learning_rate": 3.6817029304020756e-06,
|
| 40457 |
+
"loss": 0.3934,
|
| 40458 |
+
"step": 57680
|
| 40459 |
+
},
|
| 40460 |
+
{
|
| 40461 |
+
"epoch": 5.77828416887865,
|
| 40462 |
+
"grad_norm": 1.857803225517273,
|
| 40463 |
+
"learning_rate": 3.6758265464915785e-06,
|
| 40464 |
+
"loss": 0.4118,
|
| 40465 |
+
"step": 57690
|
| 40466 |
+
},
|
| 40467 |
+
{
|
| 40468 |
+
"epoch": 5.779285821605649,
|
| 40469 |
+
"grad_norm": 1.995293140411377,
|
| 40470 |
+
"learning_rate": 3.6699544838415033e-06,
|
| 40471 |
+
"loss": 0.3811,
|
| 40472 |
+
"step": 57700
|
| 40473 |
+
},
|
| 40474 |
+
{
|
| 40475 |
+
"epoch": 5.780287474332649,
|
| 40476 |
+
"grad_norm": 2.203603982925415,
|
| 40477 |
+
"learning_rate": 3.6640867436417897e-06,
|
| 40478 |
+
"loss": 0.4568,
|
| 40479 |
+
"step": 57710
|
| 40480 |
+
},
|
| 40481 |
+
{
|
| 40482 |
+
"epoch": 5.781289127059648,
|
| 40483 |
+
"grad_norm": 2.020482301712036,
|
| 40484 |
+
"learning_rate": 3.6582233270815252e-06,
|
| 40485 |
+
"loss": 0.3451,
|
| 40486 |
+
"step": 57720
|
| 40487 |
+
},
|
| 40488 |
+
{
|
| 40489 |
+
"epoch": 5.782290779786648,
|
| 40490 |
+
"grad_norm": 2.640033721923828,
|
| 40491 |
+
"learning_rate": 3.6523642353489097e-06,
|
| 40492 |
+
"loss": 0.4285,
|
| 40493 |
+
"step": 57730
|
| 40494 |
+
},
|
| 40495 |
+
{
|
| 40496 |
+
"epoch": 5.783292432513647,
|
| 40497 |
+
"grad_norm": 2.9443697929382324,
|
| 40498 |
+
"learning_rate": 3.646509469631254e-06,
|
| 40499 |
+
"loss": 0.4025,
|
| 40500 |
+
"step": 57740
|
| 40501 |
+
},
|
| 40502 |
+
{
|
| 40503 |
+
"epoch": 5.7842940852406475,
|
| 40504 |
+
"grad_norm": 1.8863335847854614,
|
| 40505 |
+
"learning_rate": 3.640659031115001e-06,
|
| 40506 |
+
"loss": 0.4332,
|
| 40507 |
+
"step": 57750
|
| 40508 |
+
},
|
| 40509 |
+
{
|
| 40510 |
+
"epoch": 5.785295737967647,
|
| 40511 |
+
"grad_norm": 2.114644765853882,
|
| 40512 |
+
"learning_rate": 3.634812920985736e-06,
|
| 40513 |
+
"loss": 0.4284,
|
| 40514 |
+
"step": 57760
|
| 40515 |
+
},
|
| 40516 |
+
{
|
| 40517 |
+
"epoch": 5.786297390694646,
|
| 40518 |
+
"grad_norm": 2.059697151184082,
|
| 40519 |
+
"learning_rate": 3.6289711404281445e-06,
|
| 40520 |
+
"loss": 0.3775,
|
| 40521 |
+
"step": 57770
|
| 40522 |
+
},
|
| 40523 |
+
{
|
| 40524 |
+
"epoch": 5.787299043421646,
|
| 40525 |
+
"grad_norm": 1.0591448545455933,
|
| 40526 |
+
"learning_rate": 3.6231336906260265e-06,
|
| 40527 |
+
"loss": 0.3965,
|
| 40528 |
+
"step": 57780
|
| 40529 |
+
},
|
| 40530 |
+
{
|
| 40531 |
+
"epoch": 5.788300696148645,
|
| 40532 |
+
"grad_norm": 2.3515522480010986,
|
| 40533 |
+
"learning_rate": 3.617300572762339e-06,
|
| 40534 |
+
"loss": 0.4015,
|
| 40535 |
+
"step": 57790
|
| 40536 |
+
},
|
| 40537 |
+
{
|
| 40538 |
+
"epoch": 5.789302348875645,
|
| 40539 |
+
"grad_norm": 2.6853671073913574,
|
| 40540 |
+
"learning_rate": 3.6114717880191275e-06,
|
| 40541 |
+
"loss": 0.4127,
|
| 40542 |
+
"step": 57800
|
| 40543 |
+
},
|
| 40544 |
+
{
|
| 40545 |
+
"epoch": 5.790304001602644,
|
| 40546 |
+
"grad_norm": 1.6716210842132568,
|
| 40547 |
+
"learning_rate": 3.6056473375775807e-06,
|
| 40548 |
+
"loss": 0.3998,
|
| 40549 |
+
"step": 57810
|
| 40550 |
+
},
|
| 40551 |
+
{
|
| 40552 |
+
"epoch": 5.791305654329644,
|
| 40553 |
+
"grad_norm": 2.626436471939087,
|
| 40554 |
+
"learning_rate": 3.599827222617985e-06,
|
| 40555 |
+
"loss": 0.4619,
|
| 40556 |
+
"step": 57820
|
| 40557 |
+
},
|
| 40558 |
+
{
|
| 40559 |
+
"epoch": 5.792307307056643,
|
| 40560 |
+
"grad_norm": 2.5398974418640137,
|
| 40561 |
+
"learning_rate": 3.594011444319781e-06,
|
| 40562 |
+
"loss": 0.4694,
|
| 40563 |
+
"step": 57830
|
| 40564 |
+
},
|
| 40565 |
+
{
|
| 40566 |
+
"epoch": 5.793308959783643,
|
| 40567 |
+
"grad_norm": 2.1525611877441406,
|
| 40568 |
+
"learning_rate": 3.5882000038615137e-06,
|
| 40569 |
+
"loss": 0.4092,
|
| 40570 |
+
"step": 57840
|
| 40571 |
+
},
|
| 40572 |
+
{
|
| 40573 |
+
"epoch": 5.7943106125106425,
|
| 40574 |
+
"grad_norm": 2.013880968093872,
|
| 40575 |
+
"learning_rate": 3.5823929024208306e-06,
|
| 40576 |
+
"loss": 0.3854,
|
| 40577 |
+
"step": 57850
|
| 40578 |
+
},
|
| 40579 |
+
{
|
| 40580 |
+
"epoch": 5.7953122652376425,
|
| 40581 |
+
"grad_norm": 2.4940240383148193,
|
| 40582 |
+
"learning_rate": 3.576590141174535e-06,
|
| 40583 |
+
"loss": 0.3959,
|
| 40584 |
+
"step": 57860
|
| 40585 |
+
},
|
| 40586 |
+
{
|
| 40587 |
+
"epoch": 5.796313917964642,
|
| 40588 |
+
"grad_norm": 2.325786590576172,
|
| 40589 |
+
"learning_rate": 3.5707917212985305e-06,
|
| 40590 |
+
"loss": 0.403,
|
| 40591 |
+
"step": 57870
|
| 40592 |
+
},
|
| 40593 |
+
{
|
| 40594 |
+
"epoch": 5.797315570691641,
|
| 40595 |
+
"grad_norm": 2.1471121311187744,
|
| 40596 |
+
"learning_rate": 3.5649976439678463e-06,
|
| 40597 |
+
"loss": 0.4332,
|
| 40598 |
+
"step": 57880
|
| 40599 |
+
},
|
| 40600 |
+
{
|
| 40601 |
+
"epoch": 5.798317223418641,
|
| 40602 |
+
"grad_norm": 1.913191318511963,
|
| 40603 |
+
"learning_rate": 3.559207910356613e-06,
|
| 40604 |
+
"loss": 0.412,
|
| 40605 |
+
"step": 57890
|
| 40606 |
+
},
|
| 40607 |
+
{
|
| 40608 |
+
"epoch": 5.79931887614564,
|
| 40609 |
+
"grad_norm": 1.8169552087783813,
|
| 40610 |
+
"learning_rate": 3.5534225216381195e-06,
|
| 40611 |
+
"loss": 0.3487,
|
| 40612 |
+
"step": 57900
|
| 40613 |
+
},
|
| 40614 |
+
{
|
| 40615 |
+
"epoch": 5.80032052887264,
|
| 40616 |
+
"grad_norm": 2.073249578475952,
|
| 40617 |
+
"learning_rate": 3.5476414789847394e-06,
|
| 40618 |
+
"loss": 0.3892,
|
| 40619 |
+
"step": 57910
|
| 40620 |
+
},
|
| 40621 |
+
{
|
| 40622 |
+
"epoch": 5.801322181599639,
|
| 40623 |
+
"grad_norm": 1.8279776573181152,
|
| 40624 |
+
"learning_rate": 3.5418647835679857e-06,
|
| 40625 |
+
"loss": 0.3677,
|
| 40626 |
+
"step": 57920
|
| 40627 |
+
},
|
| 40628 |
+
{
|
| 40629 |
+
"epoch": 5.802323834326639,
|
| 40630 |
+
"grad_norm": 2.387899398803711,
|
| 40631 |
+
"learning_rate": 3.5360924365584747e-06,
|
| 40632 |
+
"loss": 0.4739,
|
| 40633 |
+
"step": 57930
|
| 40634 |
+
},
|
| 40635 |
+
{
|
| 40636 |
+
"epoch": 5.803325487053638,
|
| 40637 |
+
"grad_norm": 2.970022201538086,
|
| 40638 |
+
"learning_rate": 3.5303244391259565e-06,
|
| 40639 |
+
"loss": 0.3938,
|
| 40640 |
+
"step": 57940
|
| 40641 |
+
},
|
| 40642 |
+
{
|
| 40643 |
+
"epoch": 5.804327139780638,
|
| 40644 |
+
"grad_norm": 1.9647514820098877,
|
| 40645 |
+
"learning_rate": 3.5245607924392994e-06,
|
| 40646 |
+
"loss": 0.447,
|
| 40647 |
+
"step": 57950
|
| 40648 |
+
},
|
| 40649 |
+
{
|
| 40650 |
+
"epoch": 5.805328792507638,
|
| 40651 |
+
"grad_norm": 1.8893002271652222,
|
| 40652 |
+
"learning_rate": 3.5188014976664603e-06,
|
| 40653 |
+
"loss": 0.4014,
|
| 40654 |
+
"step": 57960
|
| 40655 |
+
},
|
| 40656 |
+
{
|
| 40657 |
+
"epoch": 5.806330445234638,
|
| 40658 |
+
"grad_norm": 2.2961087226867676,
|
| 40659 |
+
"learning_rate": 3.513046555974561e-06,
|
| 40660 |
+
"loss": 0.374,
|
| 40661 |
+
"step": 57970
|
| 40662 |
+
},
|
| 40663 |
+
{
|
| 40664 |
+
"epoch": 5.807332097961637,
|
| 40665 |
+
"grad_norm": 2.0700502395629883,
|
| 40666 |
+
"learning_rate": 3.507295968529811e-06,
|
| 40667 |
+
"loss": 0.4457,
|
| 40668 |
+
"step": 57980
|
| 40669 |
+
},
|
| 40670 |
+
{
|
| 40671 |
+
"epoch": 5.808333750688636,
|
| 40672 |
+
"grad_norm": 2.2028748989105225,
|
| 40673 |
+
"learning_rate": 3.501549736497542e-06,
|
| 40674 |
+
"loss": 0.4251,
|
| 40675 |
+
"step": 57990
|
| 40676 |
+
},
|
| 40677 |
+
{
|
| 40678 |
+
"epoch": 5.809335403415636,
|
| 40679 |
+
"grad_norm": 2.2810099124908447,
|
| 40680 |
+
"learning_rate": 3.4958078610422077e-06,
|
| 40681 |
+
"loss": 0.4206,
|
| 40682 |
+
"step": 58000
|
| 40683 |
+
},
|
| 40684 |
+
{
|
| 40685 |
+
"epoch": 5.810337056142635,
|
| 40686 |
+
"grad_norm": 2.1720199584960938,
|
| 40687 |
+
"learning_rate": 3.4900703433273756e-06,
|
| 40688 |
+
"loss": 0.4189,
|
| 40689 |
+
"step": 58010
|
| 40690 |
+
},
|
| 40691 |
+
{
|
| 40692 |
+
"epoch": 5.811338708869635,
|
| 40693 |
+
"grad_norm": 3.048063278198242,
|
| 40694 |
+
"learning_rate": 3.4843371845157347e-06,
|
| 40695 |
+
"loss": 0.4415,
|
| 40696 |
+
"step": 58020
|
| 40697 |
+
},
|
| 40698 |
+
{
|
| 40699 |
+
"epoch": 5.812340361596634,
|
| 40700 |
+
"grad_norm": 2.880526542663574,
|
| 40701 |
+
"learning_rate": 3.4786083857690804e-06,
|
| 40702 |
+
"loss": 0.3805,
|
| 40703 |
+
"step": 58030
|
| 40704 |
+
},
|
| 40705 |
+
{
|
| 40706 |
+
"epoch": 5.813342014323634,
|
| 40707 |
+
"grad_norm": 2.9283924102783203,
|
| 40708 |
+
"learning_rate": 3.4728839482483374e-06,
|
| 40709 |
+
"loss": 0.4197,
|
| 40710 |
+
"step": 58040
|
| 40711 |
+
},
|
| 40712 |
+
{
|
| 40713 |
+
"epoch": 5.814343667050633,
|
| 40714 |
+
"grad_norm": 2.497363805770874,
|
| 40715 |
+
"learning_rate": 3.467163873113535e-06,
|
| 40716 |
+
"loss": 0.419,
|
| 40717 |
+
"step": 58050
|
| 40718 |
+
},
|
| 40719 |
+
{
|
| 40720 |
+
"epoch": 5.8153453197776335,
|
| 40721 |
+
"grad_norm": 2.2618627548217773,
|
| 40722 |
+
"learning_rate": 3.4614481615238307e-06,
|
| 40723 |
+
"loss": 0.4099,
|
| 40724 |
+
"step": 58060
|
| 40725 |
+
},
|
| 40726 |
+
{
|
| 40727 |
+
"epoch": 5.816346972504633,
|
| 40728 |
+
"grad_norm": 2.3808374404907227,
|
| 40729 |
+
"learning_rate": 3.455736814637489e-06,
|
| 40730 |
+
"loss": 0.4432,
|
| 40731 |
+
"step": 58070
|
| 40732 |
+
},
|
| 40733 |
+
{
|
| 40734 |
+
"epoch": 5.817348625231633,
|
| 40735 |
+
"grad_norm": 2.3720083236694336,
|
| 40736 |
+
"learning_rate": 3.4500298336118895e-06,
|
| 40737 |
+
"loss": 0.3802,
|
| 40738 |
+
"step": 58080
|
| 40739 |
+
},
|
| 40740 |
+
{
|
| 40741 |
+
"epoch": 5.818350277958632,
|
| 40742 |
+
"grad_norm": 2.0641584396362305,
|
| 40743 |
+
"learning_rate": 3.44432721960353e-06,
|
| 40744 |
+
"loss": 0.4029,
|
| 40745 |
+
"step": 58090
|
| 40746 |
+
},
|
| 40747 |
+
{
|
| 40748 |
+
"epoch": 5.819351930685631,
|
| 40749 |
+
"grad_norm": 2.339599370956421,
|
| 40750 |
+
"learning_rate": 3.4386289737680238e-06,
|
| 40751 |
+
"loss": 0.3414,
|
| 40752 |
+
"step": 58100
|
| 40753 |
+
},
|
| 40754 |
+
{
|
| 40755 |
+
"epoch": 5.820353583412631,
|
| 40756 |
+
"grad_norm": 2.4418370723724365,
|
| 40757 |
+
"learning_rate": 3.432935097260101e-06,
|
| 40758 |
+
"loss": 0.4355,
|
| 40759 |
+
"step": 58110
|
| 40760 |
+
},
|
| 40761 |
+
{
|
| 40762 |
+
"epoch": 5.82135523613963,
|
| 40763 |
+
"grad_norm": 2.2036075592041016,
|
| 40764 |
+
"learning_rate": 3.4272455912335993e-06,
|
| 40765 |
+
"loss": 0.3723,
|
| 40766 |
+
"step": 58120
|
| 40767 |
+
},
|
| 40768 |
+
{
|
| 40769 |
+
"epoch": 5.82235688886663,
|
| 40770 |
+
"grad_norm": 2.0506880283355713,
|
| 40771 |
+
"learning_rate": 3.4215604568414744e-06,
|
| 40772 |
+
"loss": 0.3452,
|
| 40773 |
+
"step": 58130
|
| 40774 |
+
},
|
| 40775 |
+
{
|
| 40776 |
+
"epoch": 5.823358541593629,
|
| 40777 |
+
"grad_norm": 2.293076992034912,
|
| 40778 |
+
"learning_rate": 3.415879695235802e-06,
|
| 40779 |
+
"loss": 0.4473,
|
| 40780 |
+
"step": 58140
|
| 40781 |
+
},
|
| 40782 |
+
{
|
| 40783 |
+
"epoch": 5.824360194320629,
|
| 40784 |
+
"grad_norm": 2.3374907970428467,
|
| 40785 |
+
"learning_rate": 3.410203307567761e-06,
|
| 40786 |
+
"loss": 0.44,
|
| 40787 |
+
"step": 58150
|
| 40788 |
+
},
|
| 40789 |
+
{
|
| 40790 |
+
"epoch": 5.8253618470476285,
|
| 40791 |
+
"grad_norm": 2.3096864223480225,
|
| 40792 |
+
"learning_rate": 3.404531294987648e-06,
|
| 40793 |
+
"loss": 0.4203,
|
| 40794 |
+
"step": 58160
|
| 40795 |
+
},
|
| 40796 |
+
{
|
| 40797 |
+
"epoch": 5.826363499774628,
|
| 40798 |
+
"grad_norm": 2.263195276260376,
|
| 40799 |
+
"learning_rate": 3.3988636586448825e-06,
|
| 40800 |
+
"loss": 0.4437,
|
| 40801 |
+
"step": 58170
|
| 40802 |
+
},
|
| 40803 |
+
{
|
| 40804 |
+
"epoch": 5.827365152501628,
|
| 40805 |
+
"grad_norm": 2.6008780002593994,
|
| 40806 |
+
"learning_rate": 3.393200399687979e-06,
|
| 40807 |
+
"loss": 0.3862,
|
| 40808 |
+
"step": 58180
|
| 40809 |
+
},
|
| 40810 |
+
{
|
| 40811 |
+
"epoch": 5.828366805228628,
|
| 40812 |
+
"grad_norm": 2.4163975715637207,
|
| 40813 |
+
"learning_rate": 3.387541519264581e-06,
|
| 40814 |
+
"loss": 0.4478,
|
| 40815 |
+
"step": 58190
|
| 40816 |
+
},
|
| 40817 |
+
{
|
| 40818 |
+
"epoch": 5.829368457955627,
|
| 40819 |
+
"grad_norm": 1.9434057474136353,
|
| 40820 |
+
"learning_rate": 3.3818870185214376e-06,
|
| 40821 |
+
"loss": 0.389,
|
| 40822 |
+
"step": 58200
|
| 40823 |
+
},
|
| 40824 |
+
{
|
| 40825 |
+
"epoch": 5.830370110682626,
|
| 40826 |
+
"grad_norm": 2.101961135864258,
|
| 40827 |
+
"learning_rate": 3.3762368986044094e-06,
|
| 40828 |
+
"loss": 0.3613,
|
| 40829 |
+
"step": 58210
|
| 40830 |
+
},
|
| 40831 |
+
{
|
| 40832 |
+
"epoch": 5.831371763409626,
|
| 40833 |
+
"grad_norm": 2.6012158393859863,
|
| 40834 |
+
"learning_rate": 3.3705911606584733e-06,
|
| 40835 |
+
"loss": 0.4022,
|
| 40836 |
+
"step": 58220
|
| 40837 |
+
},
|
| 40838 |
+
{
|
| 40839 |
+
"epoch": 5.832373416136625,
|
| 40840 |
+
"grad_norm": 2.9583868980407715,
|
| 40841 |
+
"learning_rate": 3.3649498058277078e-06,
|
| 40842 |
+
"loss": 0.3645,
|
| 40843 |
+
"step": 58230
|
| 40844 |
+
},
|
| 40845 |
+
{
|
| 40846 |
+
"epoch": 5.833375068863625,
|
| 40847 |
+
"grad_norm": 2.1901628971099854,
|
| 40848 |
+
"learning_rate": 3.3593128352553294e-06,
|
| 40849 |
+
"loss": 0.3959,
|
| 40850 |
+
"step": 58240
|
| 40851 |
+
},
|
| 40852 |
+
{
|
| 40853 |
+
"epoch": 5.834376721590624,
|
| 40854 |
+
"grad_norm": 2.3684897422790527,
|
| 40855 |
+
"learning_rate": 3.353680250083632e-06,
|
| 40856 |
+
"loss": 0.434,
|
| 40857 |
+
"step": 58250
|
| 40858 |
+
},
|
| 40859 |
+
{
|
| 40860 |
+
"epoch": 5.835378374317624,
|
| 40861 |
+
"grad_norm": 2.165945291519165,
|
| 40862 |
+
"learning_rate": 3.3480520514540427e-06,
|
| 40863 |
+
"loss": 0.4129,
|
| 40864 |
+
"step": 58260
|
| 40865 |
+
},
|
| 40866 |
+
{
|
| 40867 |
+
"epoch": 5.8363800270446236,
|
| 40868 |
+
"grad_norm": 2.228867292404175,
|
| 40869 |
+
"learning_rate": 3.342428240507095e-06,
|
| 40870 |
+
"loss": 0.3903,
|
| 40871 |
+
"step": 58270
|
| 40872 |
+
},
|
| 40873 |
+
{
|
| 40874 |
+
"epoch": 5.837381679771623,
|
| 40875 |
+
"grad_norm": 1.7761813402175903,
|
| 40876 |
+
"learning_rate": 3.336808818382428e-06,
|
| 40877 |
+
"loss": 0.41,
|
| 40878 |
+
"step": 58280
|
| 40879 |
+
},
|
| 40880 |
+
{
|
| 40881 |
+
"epoch": 5.838383332498623,
|
| 40882 |
+
"grad_norm": 2.542438507080078,
|
| 40883 |
+
"learning_rate": 3.3311937862188e-06,
|
| 40884 |
+
"loss": 0.3999,
|
| 40885 |
+
"step": 58290
|
| 40886 |
+
},
|
| 40887 |
+
{
|
| 40888 |
+
"epoch": 5.839384985225623,
|
| 40889 |
+
"grad_norm": 2.026426076889038,
|
| 40890 |
+
"learning_rate": 3.3255831451540682e-06,
|
| 40891 |
+
"loss": 0.411,
|
| 40892 |
+
"step": 58300
|
| 40893 |
+
},
|
| 40894 |
+
{
|
| 40895 |
+
"epoch": 5.840386637952622,
|
| 40896 |
+
"grad_norm": 2.4971375465393066,
|
| 40897 |
+
"learning_rate": 3.319976896325222e-06,
|
| 40898 |
+
"loss": 0.4619,
|
| 40899 |
+
"step": 58310
|
| 40900 |
+
},
|
| 40901 |
+
{
|
| 40902 |
+
"epoch": 5.841388290679621,
|
| 40903 |
+
"grad_norm": 1.815487027168274,
|
| 40904 |
+
"learning_rate": 3.3143750408683312e-06,
|
| 40905 |
+
"loss": 0.393,
|
| 40906 |
+
"step": 58320
|
| 40907 |
+
},
|
| 40908 |
+
{
|
| 40909 |
+
"epoch": 5.842389943406621,
|
| 40910 |
+
"grad_norm": 1.9683047533035278,
|
| 40911 |
+
"learning_rate": 3.3087775799185954e-06,
|
| 40912 |
+
"loss": 0.3709,
|
| 40913 |
+
"step": 58330
|
| 40914 |
+
},
|
| 40915 |
+
{
|
| 40916 |
+
"epoch": 5.84339159613362,
|
| 40917 |
+
"grad_norm": 2.5225014686584473,
|
| 40918 |
+
"learning_rate": 3.303184514610319e-06,
|
| 40919 |
+
"loss": 0.4277,
|
| 40920 |
+
"step": 58340
|
| 40921 |
+
},
|
| 40922 |
+
{
|
| 40923 |
+
"epoch": 5.84439324886062,
|
| 40924 |
+
"grad_norm": 2.2868824005126953,
|
| 40925 |
+
"learning_rate": 3.297595846076912e-06,
|
| 40926 |
+
"loss": 0.3846,
|
| 40927 |
+
"step": 58350
|
| 40928 |
+
},
|
| 40929 |
+
{
|
| 40930 |
+
"epoch": 5.845394901587619,
|
| 40931 |
+
"grad_norm": 2.6198313236236572,
|
| 40932 |
+
"learning_rate": 3.2920115754508982e-06,
|
| 40933 |
+
"loss": 0.3833,
|
| 40934 |
+
"step": 58360
|
| 40935 |
+
},
|
| 40936 |
+
{
|
| 40937 |
+
"epoch": 5.8463965543146195,
|
| 40938 |
+
"grad_norm": 2.4089162349700928,
|
| 40939 |
+
"learning_rate": 3.286431703863904e-06,
|
| 40940 |
+
"loss": 0.4173,
|
| 40941 |
+
"step": 58370
|
| 40942 |
+
},
|
| 40943 |
+
{
|
| 40944 |
+
"epoch": 5.847398207041619,
|
| 40945 |
+
"grad_norm": 2.791187047958374,
|
| 40946 |
+
"learning_rate": 3.280856232446683e-06,
|
| 40947 |
+
"loss": 0.4522,
|
| 40948 |
+
"step": 58380
|
| 40949 |
+
},
|
| 40950 |
+
{
|
| 40951 |
+
"epoch": 5.848399859768618,
|
| 40952 |
+
"grad_norm": 2.8218894004821777,
|
| 40953 |
+
"learning_rate": 3.275285162329067e-06,
|
| 40954 |
+
"loss": 0.4694,
|
| 40955 |
+
"step": 58390
|
| 40956 |
+
},
|
| 40957 |
+
{
|
| 40958 |
+
"epoch": 5.849401512495618,
|
| 40959 |
+
"grad_norm": 2.2173585891723633,
|
| 40960 |
+
"learning_rate": 3.2697184946400145e-06,
|
| 40961 |
+
"loss": 0.4933,
|
| 40962 |
+
"step": 58400
|
| 40963 |
+
},
|
| 40964 |
+
{
|
| 40965 |
+
"epoch": 5.850403165222617,
|
| 40966 |
+
"grad_norm": 2.342435836791992,
|
| 40967 |
+
"learning_rate": 3.264156230507587e-06,
|
| 40968 |
+
"loss": 0.4235,
|
| 40969 |
+
"step": 58410
|
| 40970 |
+
},
|
| 40971 |
+
{
|
| 40972 |
+
"epoch": 5.851404817949617,
|
| 40973 |
+
"grad_norm": 2.042520761489868,
|
| 40974 |
+
"learning_rate": 3.2585983710589714e-06,
|
| 40975 |
+
"loss": 0.3669,
|
| 40976 |
+
"step": 58420
|
| 40977 |
+
},
|
| 40978 |
+
{
|
| 40979 |
+
"epoch": 5.852406470676616,
|
| 40980 |
+
"grad_norm": 3.4217960834503174,
|
| 40981 |
+
"learning_rate": 3.253044917420431e-06,
|
| 40982 |
+
"loss": 0.4388,
|
| 40983 |
+
"step": 58430
|
| 40984 |
+
},
|
| 40985 |
+
{
|
| 40986 |
+
"epoch": 5.853408123403616,
|
| 40987 |
+
"grad_norm": 2.0031356811523438,
|
| 40988 |
+
"learning_rate": 3.2474958707173457e-06,
|
| 40989 |
+
"loss": 0.4051,
|
| 40990 |
+
"step": 58440
|
| 40991 |
+
},
|
| 40992 |
+
{
|
| 40993 |
+
"epoch": 5.854409776130615,
|
| 40994 |
+
"grad_norm": 2.0639429092407227,
|
| 40995 |
+
"learning_rate": 3.2419512320742307e-06,
|
| 40996 |
+
"loss": 0.3602,
|
| 40997 |
+
"step": 58450
|
| 40998 |
+
},
|
| 40999 |
+
{
|
| 41000 |
+
"epoch": 5.855411428857615,
|
| 41001 |
+
"grad_norm": 2.1385297775268555,
|
| 41002 |
+
"learning_rate": 3.236411002614667e-06,
|
| 41003 |
+
"loss": 0.3845,
|
| 41004 |
+
"step": 58460
|
| 41005 |
+
},
|
| 41006 |
+
{
|
| 41007 |
+
"epoch": 5.8564130815846145,
|
| 41008 |
+
"grad_norm": 2.301572799682617,
|
| 41009 |
+
"learning_rate": 3.2308751834613654e-06,
|
| 41010 |
+
"loss": 0.3701,
|
| 41011 |
+
"step": 58470
|
| 41012 |
+
},
|
| 41013 |
+
{
|
| 41014 |
+
"epoch": 5.8574147343116145,
|
| 41015 |
+
"grad_norm": 2.5921859741210938,
|
| 41016 |
+
"learning_rate": 3.225343775736131e-06,
|
| 41017 |
+
"loss": 0.4385,
|
| 41018 |
+
"step": 58480
|
| 41019 |
+
},
|
| 41020 |
+
{
|
| 41021 |
+
"epoch": 5.858416387038614,
|
| 41022 |
+
"grad_norm": 1.7083300352096558,
|
| 41023 |
+
"learning_rate": 3.219816780559903e-06,
|
| 41024 |
+
"loss": 0.4079,
|
| 41025 |
+
"step": 58490
|
| 41026 |
+
},
|
| 41027 |
+
{
|
| 41028 |
+
"epoch": 5.859418039765613,
|
| 41029 |
+
"grad_norm": 2.4190192222595215,
|
| 41030 |
+
"learning_rate": 3.214294199052684e-06,
|
| 41031 |
+
"loss": 0.4171,
|
| 41032 |
+
"step": 58500
|
| 41033 |
+
},
|
| 41034 |
+
{
|
| 41035 |
+
"epoch": 5.860419692492613,
|
| 41036 |
+
"grad_norm": 2.123609781265259,
|
| 41037 |
+
"learning_rate": 3.2087760323336073e-06,
|
| 41038 |
+
"loss": 0.4141,
|
| 41039 |
+
"step": 58510
|
| 41040 |
+
},
|
| 41041 |
+
{
|
| 41042 |
+
"epoch": 5.861421345219612,
|
| 41043 |
+
"grad_norm": 2.7376978397369385,
|
| 41044 |
+
"learning_rate": 3.203262281520916e-06,
|
| 41045 |
+
"loss": 0.4035,
|
| 41046 |
+
"step": 58520
|
| 41047 |
+
},
|
| 41048 |
+
{
|
| 41049 |
+
"epoch": 5.862422997946612,
|
| 41050 |
+
"grad_norm": 2.1443138122558594,
|
| 41051 |
+
"learning_rate": 3.1977529477319523e-06,
|
| 41052 |
+
"loss": 0.4533,
|
| 41053 |
+
"step": 58530
|
| 41054 |
+
},
|
| 41055 |
+
{
|
| 41056 |
+
"epoch": 5.863424650673611,
|
| 41057 |
+
"grad_norm": 2.4468650817871094,
|
| 41058 |
+
"learning_rate": 3.1922480320831476e-06,
|
| 41059 |
+
"loss": 0.4372,
|
| 41060 |
+
"step": 58540
|
| 41061 |
+
},
|
| 41062 |
+
{
|
| 41063 |
+
"epoch": 5.864426303400611,
|
| 41064 |
+
"grad_norm": 1.9587702751159668,
|
| 41065 |
+
"learning_rate": 3.1867475356900545e-06,
|
| 41066 |
+
"loss": 0.381,
|
| 41067 |
+
"step": 58550
|
| 41068 |
+
},
|
| 41069 |
+
{
|
| 41070 |
+
"epoch": 5.86542795612761,
|
| 41071 |
+
"grad_norm": 1.8554461002349854,
|
| 41072 |
+
"learning_rate": 3.1812514596673393e-06,
|
| 41073 |
+
"loss": 0.4171,
|
| 41074 |
+
"step": 58560
|
| 41075 |
+
},
|
| 41076 |
+
{
|
| 41077 |
+
"epoch": 5.86642960885461,
|
| 41078 |
+
"grad_norm": 2.2810850143432617,
|
| 41079 |
+
"learning_rate": 3.17575980512875e-06,
|
| 41080 |
+
"loss": 0.4555,
|
| 41081 |
+
"step": 58570
|
| 41082 |
+
},
|
| 41083 |
+
{
|
| 41084 |
+
"epoch": 5.8674312615816095,
|
| 41085 |
+
"grad_norm": 2.07438063621521,
|
| 41086 |
+
"learning_rate": 3.1702725731871467e-06,
|
| 41087 |
+
"loss": 0.4419,
|
| 41088 |
+
"step": 58580
|
| 41089 |
+
},
|
| 41090 |
+
{
|
| 41091 |
+
"epoch": 5.86843291430861,
|
| 41092 |
+
"grad_norm": 2.6593003273010254,
|
| 41093 |
+
"learning_rate": 3.164789764954504e-06,
|
| 41094 |
+
"loss": 0.3928,
|
| 41095 |
+
"step": 58590
|
| 41096 |
+
},
|
| 41097 |
+
{
|
| 41098 |
+
"epoch": 5.869434567035609,
|
| 41099 |
+
"grad_norm": 2.4539880752563477,
|
| 41100 |
+
"learning_rate": 3.1593113815418907e-06,
|
| 41101 |
+
"loss": 0.4347,
|
| 41102 |
+
"step": 58600
|
| 41103 |
+
},
|
| 41104 |
+
{
|
| 41105 |
+
"epoch": 5.870436219762608,
|
| 41106 |
+
"grad_norm": 1.5071160793304443,
|
| 41107 |
+
"learning_rate": 3.153837424059475e-06,
|
| 41108 |
+
"loss": 0.3731,
|
| 41109 |
+
"step": 58610
|
| 41110 |
+
},
|
| 41111 |
+
{
|
| 41112 |
+
"epoch": 5.871437872489608,
|
| 41113 |
+
"grad_norm": 2.6640098094940186,
|
| 41114 |
+
"learning_rate": 3.1483678936165274e-06,
|
| 41115 |
+
"loss": 0.3895,
|
| 41116 |
+
"step": 58620
|
| 41117 |
+
},
|
| 41118 |
+
{
|
| 41119 |
+
"epoch": 5.872439525216607,
|
| 41120 |
+
"grad_norm": 2.0711734294891357,
|
| 41121 |
+
"learning_rate": 3.1429027913214393e-06,
|
| 41122 |
+
"loss": 0.365,
|
| 41123 |
+
"step": 58630
|
| 41124 |
+
},
|
| 41125 |
+
{
|
| 41126 |
+
"epoch": 5.873441177943607,
|
| 41127 |
+
"grad_norm": 2.2734105587005615,
|
| 41128 |
+
"learning_rate": 3.1374421182816943e-06,
|
| 41129 |
+
"loss": 0.4336,
|
| 41130 |
+
"step": 58640
|
| 41131 |
+
},
|
| 41132 |
+
{
|
| 41133 |
+
"epoch": 5.874442830670606,
|
| 41134 |
+
"grad_norm": 1.955325961112976,
|
| 41135 |
+
"learning_rate": 3.1319858756038574e-06,
|
| 41136 |
+
"loss": 0.3711,
|
| 41137 |
+
"step": 58650
|
| 41138 |
+
},
|
| 41139 |
+
{
|
| 41140 |
+
"epoch": 5.875444483397606,
|
| 41141 |
+
"grad_norm": 1.9686741828918457,
|
| 41142 |
+
"learning_rate": 3.1265340643936327e-06,
|
| 41143 |
+
"loss": 0.3821,
|
| 41144 |
+
"step": 58660
|
| 41145 |
+
},
|
| 41146 |
+
{
|
| 41147 |
+
"epoch": 5.876446136124605,
|
| 41148 |
+
"grad_norm": 2.02400279045105,
|
| 41149 |
+
"learning_rate": 3.1210866857558057e-06,
|
| 41150 |
+
"loss": 0.4162,
|
| 41151 |
+
"step": 58670
|
| 41152 |
+
},
|
| 41153 |
+
{
|
| 41154 |
+
"epoch": 5.8774477888516055,
|
| 41155 |
+
"grad_norm": 1.770056962966919,
|
| 41156 |
+
"learning_rate": 3.115643740794261e-06,
|
| 41157 |
+
"loss": 0.3925,
|
| 41158 |
+
"step": 58680
|
| 41159 |
+
},
|
| 41160 |
+
{
|
| 41161 |
+
"epoch": 5.878449441578605,
|
| 41162 |
+
"grad_norm": 2.5358870029449463,
|
| 41163 |
+
"learning_rate": 3.1102052306119843e-06,
|
| 41164 |
+
"loss": 0.4277,
|
| 41165 |
+
"step": 58690
|
| 41166 |
+
},
|
| 41167 |
+
{
|
| 41168 |
+
"epoch": 5.879451094305605,
|
| 41169 |
+
"grad_norm": 2.007375478744507,
|
| 41170 |
+
"learning_rate": 3.1047711563110816e-06,
|
| 41171 |
+
"loss": 0.4078,
|
| 41172 |
+
"step": 58700
|
| 41173 |
+
},
|
| 41174 |
+
{
|
| 41175 |
+
"epoch": 5.880452747032604,
|
| 41176 |
+
"grad_norm": 1.9282039403915405,
|
| 41177 |
+
"learning_rate": 3.0993415189927486e-06,
|
| 41178 |
+
"loss": 0.4213,
|
| 41179 |
+
"step": 58710
|
| 41180 |
+
},
|
| 41181 |
+
{
|
| 41182 |
+
"epoch": 5.881454399759603,
|
| 41183 |
+
"grad_norm": 2.519036293029785,
|
| 41184 |
+
"learning_rate": 3.093916319757262e-06,
|
| 41185 |
+
"loss": 0.4172,
|
| 41186 |
+
"step": 58720
|
| 41187 |
+
},
|
| 41188 |
+
{
|
| 41189 |
+
"epoch": 5.882456052486603,
|
| 41190 |
+
"grad_norm": 2.8909900188446045,
|
| 41191 |
+
"learning_rate": 3.0884955597040354e-06,
|
| 41192 |
+
"loss": 0.3786,
|
| 41193 |
+
"step": 58730
|
| 41194 |
+
},
|
| 41195 |
+
{
|
| 41196 |
+
"epoch": 5.883457705213602,
|
| 41197 |
+
"grad_norm": 2.3090453147888184,
|
| 41198 |
+
"learning_rate": 3.083079239931555e-06,
|
| 41199 |
+
"loss": 0.3737,
|
| 41200 |
+
"step": 58740
|
| 41201 |
+
},
|
| 41202 |
+
{
|
| 41203 |
+
"epoch": 5.884459357940602,
|
| 41204 |
+
"grad_norm": 2.138934850692749,
|
| 41205 |
+
"learning_rate": 3.0776673615374257e-06,
|
| 41206 |
+
"loss": 0.4279,
|
| 41207 |
+
"step": 58750
|
| 41208 |
+
},
|
| 41209 |
+
{
|
| 41210 |
+
"epoch": 5.885461010667601,
|
| 41211 |
+
"grad_norm": 2.2369160652160645,
|
| 41212 |
+
"learning_rate": 3.072259925618326e-06,
|
| 41213 |
+
"loss": 0.4339,
|
| 41214 |
+
"step": 58760
|
| 41215 |
+
},
|
| 41216 |
+
{
|
| 41217 |
+
"epoch": 5.886462663394601,
|
| 41218 |
+
"grad_norm": 2.442608594894409,
|
| 41219 |
+
"learning_rate": 3.0668569332700707e-06,
|
| 41220 |
+
"loss": 0.4479,
|
| 41221 |
+
"step": 58770
|
| 41222 |
+
},
|
| 41223 |
+
{
|
| 41224 |
+
"epoch": 5.8874643161216005,
|
| 41225 |
+
"grad_norm": 3.054950475692749,
|
| 41226 |
+
"learning_rate": 3.061458385587551e-06,
|
| 41227 |
+
"loss": 0.3798,
|
| 41228 |
+
"step": 58780
|
| 41229 |
+
},
|
| 41230 |
+
{
|
| 41231 |
+
"epoch": 5.8884659688486005,
|
| 41232 |
+
"grad_norm": 3.386465311050415,
|
| 41233 |
+
"learning_rate": 3.056064283664747e-06,
|
| 41234 |
+
"loss": 0.4276,
|
| 41235 |
+
"step": 58790
|
| 41236 |
+
},
|
| 41237 |
+
{
|
| 41238 |
+
"epoch": 5.8894676215756,
|
| 41239 |
+
"grad_norm": 2.4675679206848145,
|
| 41240 |
+
"learning_rate": 3.0506746285947685e-06,
|
| 41241 |
+
"loss": 0.4091,
|
| 41242 |
+
"step": 58800
|
| 41243 |
+
},
|
| 41244 |
+
{
|
| 41245 |
+
"epoch": 5.8904692743026,
|
| 41246 |
+
"grad_norm": 2.3524179458618164,
|
| 41247 |
+
"learning_rate": 3.0452894214698046e-06,
|
| 41248 |
+
"loss": 0.3855,
|
| 41249 |
+
"step": 58810
|
| 41250 |
+
},
|
| 41251 |
+
{
|
| 41252 |
+
"epoch": 5.891470927029599,
|
| 41253 |
+
"grad_norm": 2.412656307220459,
|
| 41254 |
+
"learning_rate": 3.039908663381141e-06,
|
| 41255 |
+
"loss": 0.4196,
|
| 41256 |
+
"step": 58820
|
| 41257 |
+
},
|
| 41258 |
+
{
|
| 41259 |
+
"epoch": 5.892472579756598,
|
| 41260 |
+
"grad_norm": 2.3410556316375732,
|
| 41261 |
+
"learning_rate": 3.034532355419173e-06,
|
| 41262 |
+
"loss": 0.3997,
|
| 41263 |
+
"step": 58830
|
| 41264 |
+
},
|
| 41265 |
+
{
|
| 41266 |
+
"epoch": 5.893474232483598,
|
| 41267 |
+
"grad_norm": 1.8773866891860962,
|
| 41268 |
+
"learning_rate": 3.0291604986733863e-06,
|
| 41269 |
+
"loss": 0.3977,
|
| 41270 |
+
"step": 58840
|
| 41271 |
+
},
|
| 41272 |
+
{
|
| 41273 |
+
"epoch": 5.894475885210597,
|
| 41274 |
+
"grad_norm": 3.0513360500335693,
|
| 41275 |
+
"learning_rate": 3.0237930942323715e-06,
|
| 41276 |
+
"loss": 0.4122,
|
| 41277 |
+
"step": 58850
|
| 41278 |
+
},
|
| 41279 |
+
{
|
| 41280 |
+
"epoch": 5.895477537937597,
|
| 41281 |
+
"grad_norm": 2.220612049102783,
|
| 41282 |
+
"learning_rate": 3.018430143183798e-06,
|
| 41283 |
+
"loss": 0.4286,
|
| 41284 |
+
"step": 58860
|
| 41285 |
+
},
|
| 41286 |
+
{
|
| 41287 |
+
"epoch": 5.896479190664596,
|
| 41288 |
+
"grad_norm": 2.1296792030334473,
|
| 41289 |
+
"learning_rate": 3.01307164661446e-06,
|
| 41290 |
+
"loss": 0.3608,
|
| 41291 |
+
"step": 58870
|
| 41292 |
+
},
|
| 41293 |
+
{
|
| 41294 |
+
"epoch": 5.897480843391596,
|
| 41295 |
+
"grad_norm": 2.085975408554077,
|
| 41296 |
+
"learning_rate": 3.007717605610233e-06,
|
| 41297 |
+
"loss": 0.4108,
|
| 41298 |
+
"step": 58880
|
| 41299 |
+
},
|
| 41300 |
+
{
|
| 41301 |
+
"epoch": 5.8984824961185955,
|
| 41302 |
+
"grad_norm": 2.063328266143799,
|
| 41303 |
+
"learning_rate": 3.0023680212560913e-06,
|
| 41304 |
+
"loss": 0.4211,
|
| 41305 |
+
"step": 58890
|
| 41306 |
+
},
|
| 41307 |
+
{
|
| 41308 |
+
"epoch": 5.899484148845596,
|
| 41309 |
+
"grad_norm": 2.500905990600586,
|
| 41310 |
+
"learning_rate": 2.997022894636106e-06,
|
| 41311 |
+
"loss": 0.4119,
|
| 41312 |
+
"step": 58900
|
| 41313 |
+
},
|
| 41314 |
+
{
|
| 41315 |
+
"epoch": 5.900485801572595,
|
| 41316 |
+
"grad_norm": 2.7382659912109375,
|
| 41317 |
+
"learning_rate": 2.9916822268334505e-06,
|
| 41318 |
+
"loss": 0.4432,
|
| 41319 |
+
"step": 58910
|
| 41320 |
+
},
|
| 41321 |
+
{
|
| 41322 |
+
"epoch": 5.901487454299595,
|
| 41323 |
+
"grad_norm": 2.6298553943634033,
|
| 41324 |
+
"learning_rate": 2.986346018930386e-06,
|
| 41325 |
+
"loss": 0.3853,
|
| 41326 |
+
"step": 58920
|
| 41327 |
+
},
|
| 41328 |
+
{
|
| 41329 |
+
"epoch": 5.902489107026594,
|
| 41330 |
+
"grad_norm": 1.8519641160964966,
|
| 41331 |
+
"learning_rate": 2.9810142720082755e-06,
|
| 41332 |
+
"loss": 0.4176,
|
| 41333 |
+
"step": 58930
|
| 41334 |
+
},
|
| 41335 |
+
{
|
| 41336 |
+
"epoch": 5.903490759753593,
|
| 41337 |
+
"grad_norm": 1.7589643001556396,
|
| 41338 |
+
"learning_rate": 2.9756869871475773e-06,
|
| 41339 |
+
"loss": 0.3655,
|
| 41340 |
+
"step": 58940
|
| 41341 |
+
},
|
| 41342 |
+
{
|
| 41343 |
+
"epoch": 5.904492412480593,
|
| 41344 |
+
"grad_norm": 2.5523855686187744,
|
| 41345 |
+
"learning_rate": 2.9703641654278443e-06,
|
| 41346 |
+
"loss": 0.4341,
|
| 41347 |
+
"step": 58950
|
| 41348 |
+
},
|
| 41349 |
+
{
|
| 41350 |
+
"epoch": 5.905494065207592,
|
| 41351 |
+
"grad_norm": 2.166921615600586,
|
| 41352 |
+
"learning_rate": 2.9650458079277245e-06,
|
| 41353 |
+
"loss": 0.4539,
|
| 41354 |
+
"step": 58960
|
| 41355 |
+
},
|
| 41356 |
+
{
|
| 41357 |
+
"epoch": 5.906495717934592,
|
| 41358 |
+
"grad_norm": 3.721738338470459,
|
| 41359 |
+
"learning_rate": 2.959731915724964e-06,
|
| 41360 |
+
"loss": 0.3579,
|
| 41361 |
+
"step": 58970
|
| 41362 |
+
},
|
| 41363 |
+
{
|
| 41364 |
+
"epoch": 5.907497370661591,
|
| 41365 |
+
"grad_norm": 2.3052477836608887,
|
| 41366 |
+
"learning_rate": 2.9544224898964025e-06,
|
| 41367 |
+
"loss": 0.4763,
|
| 41368 |
+
"step": 58980
|
| 41369 |
+
},
|
| 41370 |
+
{
|
| 41371 |
+
"epoch": 5.9084990233885915,
|
| 41372 |
+
"grad_norm": 2.258401870727539,
|
| 41373 |
+
"learning_rate": 2.9491175315179725e-06,
|
| 41374 |
+
"loss": 0.3745,
|
| 41375 |
+
"step": 58990
|
| 41376 |
+
},
|
| 41377 |
+
{
|
| 41378 |
+
"epoch": 5.909500676115591,
|
| 41379 |
+
"grad_norm": 2.1679635047912598,
|
| 41380 |
+
"learning_rate": 2.9438170416647036e-06,
|
| 41381 |
+
"loss": 0.4112,
|
| 41382 |
+
"step": 59000
|
| 41383 |
+
},
|
| 41384 |
+
{
|
| 41385 |
+
"epoch": 5.910502328842591,
|
| 41386 |
+
"grad_norm": 2.070258140563965,
|
| 41387 |
+
"learning_rate": 2.93852102141072e-06,
|
| 41388 |
+
"loss": 0.4005,
|
| 41389 |
+
"step": 59010
|
| 41390 |
+
},
|
| 41391 |
+
{
|
| 41392 |
+
"epoch": 5.91150398156959,
|
| 41393 |
+
"grad_norm": 2.071631669998169,
|
| 41394 |
+
"learning_rate": 2.9332294718292406e-06,
|
| 41395 |
+
"loss": 0.4322,
|
| 41396 |
+
"step": 59020
|
| 41397 |
+
},
|
| 41398 |
+
{
|
| 41399 |
+
"epoch": 5.91250563429659,
|
| 41400 |
+
"grad_norm": 1.5518900156021118,
|
| 41401 |
+
"learning_rate": 2.9279423939925747e-06,
|
| 41402 |
+
"loss": 0.4022,
|
| 41403 |
+
"step": 59030
|
| 41404 |
+
},
|
| 41405 |
+
{
|
| 41406 |
+
"epoch": 5.913507287023589,
|
| 41407 |
+
"grad_norm": 1.7675957679748535,
|
| 41408 |
+
"learning_rate": 2.922659788972129e-06,
|
| 41409 |
+
"loss": 0.3735,
|
| 41410 |
+
"step": 59040
|
| 41411 |
+
},
|
| 41412 |
+
{
|
| 41413 |
+
"epoch": 5.914508939750588,
|
| 41414 |
+
"grad_norm": 2.1757771968841553,
|
| 41415 |
+
"learning_rate": 2.9173816578384056e-06,
|
| 41416 |
+
"loss": 0.391,
|
| 41417 |
+
"step": 59050
|
| 41418 |
+
},
|
| 41419 |
+
{
|
| 41420 |
+
"epoch": 5.915510592477588,
|
| 41421 |
+
"grad_norm": 2.069638967514038,
|
| 41422 |
+
"learning_rate": 2.912108001660993e-06,
|
| 41423 |
+
"loss": 0.3786,
|
| 41424 |
+
"step": 59060
|
| 41425 |
+
},
|
| 41426 |
+
{
|
| 41427 |
+
"epoch": 5.916512245204587,
|
| 41428 |
+
"grad_norm": 1.9366977214813232,
|
| 41429 |
+
"learning_rate": 2.9068388215085784e-06,
|
| 41430 |
+
"loss": 0.4201,
|
| 41431 |
+
"step": 59070
|
| 41432 |
+
},
|
| 41433 |
+
{
|
| 41434 |
+
"epoch": 5.917513897931587,
|
| 41435 |
+
"grad_norm": 2.3125500679016113,
|
| 41436 |
+
"learning_rate": 2.9015741184489383e-06,
|
| 41437 |
+
"loss": 0.4319,
|
| 41438 |
+
"step": 59080
|
| 41439 |
+
},
|
| 41440 |
+
{
|
| 41441 |
+
"epoch": 5.9185155506585865,
|
| 41442 |
+
"grad_norm": 2.5883350372314453,
|
| 41443 |
+
"learning_rate": 2.8963138935489494e-06,
|
| 41444 |
+
"loss": 0.4211,
|
| 41445 |
+
"step": 59090
|
| 41446 |
+
},
|
| 41447 |
+
{
|
| 41448 |
+
"epoch": 5.9195172033855865,
|
| 41449 |
+
"grad_norm": 2.3464431762695312,
|
| 41450 |
+
"learning_rate": 2.891058147874573e-06,
|
| 41451 |
+
"loss": 0.4863,
|
| 41452 |
+
"step": 59100
|
| 41453 |
+
},
|
| 41454 |
+
{
|
| 41455 |
+
"epoch": 5.920518856112586,
|
| 41456 |
+
"grad_norm": 1.9064674377441406,
|
| 41457 |
+
"learning_rate": 2.885806882490866e-06,
|
| 41458 |
+
"loss": 0.4283,
|
| 41459 |
+
"step": 59110
|
| 41460 |
+
},
|
| 41461 |
+
{
|
| 41462 |
+
"epoch": 5.921520508839586,
|
| 41463 |
+
"grad_norm": 1.7782020568847656,
|
| 41464 |
+
"learning_rate": 2.8805600984619733e-06,
|
| 41465 |
+
"loss": 0.3567,
|
| 41466 |
+
"step": 59120
|
| 41467 |
+
},
|
| 41468 |
+
{
|
| 41469 |
+
"epoch": 5.922522161566585,
|
| 41470 |
+
"grad_norm": 2.7385454177856445,
|
| 41471 |
+
"learning_rate": 2.87531779685114e-06,
|
| 41472 |
+
"loss": 0.4569,
|
| 41473 |
+
"step": 59130
|
| 41474 |
+
},
|
| 41475 |
+
{
|
| 41476 |
+
"epoch": 5.923523814293585,
|
| 41477 |
+
"grad_norm": 1.8192131519317627,
|
| 41478 |
+
"learning_rate": 2.8700799787206966e-06,
|
| 41479 |
+
"loss": 0.3606,
|
| 41480 |
+
"step": 59140
|
| 41481 |
+
},
|
| 41482 |
+
{
|
| 41483 |
+
"epoch": 5.924525467020584,
|
| 41484 |
+
"grad_norm": 2.351938486099243,
|
| 41485 |
+
"learning_rate": 2.864846645132066e-06,
|
| 41486 |
+
"loss": 0.3906,
|
| 41487 |
+
"step": 59150
|
| 41488 |
+
},
|
| 41489 |
+
{
|
| 41490 |
+
"epoch": 5.925527119747583,
|
| 41491 |
+
"grad_norm": 2.4750707149505615,
|
| 41492 |
+
"learning_rate": 2.859617797145761e-06,
|
| 41493 |
+
"loss": 0.3825,
|
| 41494 |
+
"step": 59160
|
| 41495 |
+
},
|
| 41496 |
+
{
|
| 41497 |
+
"epoch": 5.926528772474583,
|
| 41498 |
+
"grad_norm": 2.4524879455566406,
|
| 41499 |
+
"learning_rate": 2.8543934358213926e-06,
|
| 41500 |
+
"loss": 0.4104,
|
| 41501 |
+
"step": 59170
|
| 41502 |
+
},
|
| 41503 |
+
{
|
| 41504 |
+
"epoch": 5.927530425201582,
|
| 41505 |
+
"grad_norm": 2.594944953918457,
|
| 41506 |
+
"learning_rate": 2.8491735622176496e-06,
|
| 41507 |
+
"loss": 0.4243,
|
| 41508 |
+
"step": 59180
|
| 41509 |
+
},
|
| 41510 |
+
{
|
| 41511 |
+
"epoch": 5.928532077928582,
|
| 41512 |
+
"grad_norm": 2.297057867050171,
|
| 41513 |
+
"learning_rate": 2.8439581773923253e-06,
|
| 41514 |
+
"loss": 0.367,
|
| 41515 |
+
"step": 59190
|
| 41516 |
+
},
|
| 41517 |
+
{
|
| 41518 |
+
"epoch": 5.9295337306555815,
|
| 41519 |
+
"grad_norm": 2.190986156463623,
|
| 41520 |
+
"learning_rate": 2.838747282402293e-06,
|
| 41521 |
+
"loss": 0.3989,
|
| 41522 |
+
"step": 59200
|
| 41523 |
+
},
|
| 41524 |
+
{
|
| 41525 |
+
"epoch": 5.930535383382582,
|
| 41526 |
+
"grad_norm": 2.1682560443878174,
|
| 41527 |
+
"learning_rate": 2.8335408783035227e-06,
|
| 41528 |
+
"loss": 0.4141,
|
| 41529 |
+
"step": 59210
|
| 41530 |
+
},
|
| 41531 |
+
{
|
| 41532 |
+
"epoch": 5.931537036109581,
|
| 41533 |
+
"grad_norm": 2.400972366333008,
|
| 41534 |
+
"learning_rate": 2.8283389661510728e-06,
|
| 41535 |
+
"loss": 0.4541,
|
| 41536 |
+
"step": 59220
|
| 41537 |
+
},
|
| 41538 |
+
{
|
| 41539 |
+
"epoch": 5.93253868883658,
|
| 41540 |
+
"grad_norm": 2.003436326980591,
|
| 41541 |
+
"learning_rate": 2.8231415469990862e-06,
|
| 41542 |
+
"loss": 0.3708,
|
| 41543 |
+
"step": 59230
|
| 41544 |
+
},
|
| 41545 |
+
{
|
| 41546 |
+
"epoch": 5.93354034156358,
|
| 41547 |
+
"grad_norm": 2.1111843585968018,
|
| 41548 |
+
"learning_rate": 2.817948621900798e-06,
|
| 41549 |
+
"loss": 0.356,
|
| 41550 |
+
"step": 59240
|
| 41551 |
+
},
|
| 41552 |
+
{
|
| 41553 |
+
"epoch": 5.93454199429058,
|
| 41554 |
+
"grad_norm": 1.9980701208114624,
|
| 41555 |
+
"learning_rate": 2.8127601919085505e-06,
|
| 41556 |
+
"loss": 0.4125,
|
| 41557 |
+
"step": 59250
|
| 41558 |
+
},
|
| 41559 |
+
{
|
| 41560 |
+
"epoch": 5.935543647017579,
|
| 41561 |
+
"grad_norm": 2.2937803268432617,
|
| 41562 |
+
"learning_rate": 2.8075762580737407e-06,
|
| 41563 |
+
"loss": 0.3955,
|
| 41564 |
+
"step": 59260
|
| 41565 |
+
},
|
| 41566 |
+
{
|
| 41567 |
+
"epoch": 5.936545299744578,
|
| 41568 |
+
"grad_norm": 2.0709946155548096,
|
| 41569 |
+
"learning_rate": 2.802396821446876e-06,
|
| 41570 |
+
"loss": 0.3977,
|
| 41571 |
+
"step": 59270
|
| 41572 |
+
},
|
| 41573 |
+
{
|
| 41574 |
+
"epoch": 5.937546952471578,
|
| 41575 |
+
"grad_norm": 2.058303117752075,
|
| 41576 |
+
"learning_rate": 2.7972218830775526e-06,
|
| 41577 |
+
"loss": 0.4004,
|
| 41578 |
+
"step": 59280
|
| 41579 |
+
},
|
| 41580 |
+
{
|
| 41581 |
+
"epoch": 5.938548605198577,
|
| 41582 |
+
"grad_norm": 2.3546252250671387,
|
| 41583 |
+
"learning_rate": 2.792051444014451e-06,
|
| 41584 |
+
"loss": 0.4275,
|
| 41585 |
+
"step": 59290
|
| 41586 |
+
},
|
| 41587 |
+
{
|
| 41588 |
+
"epoch": 5.9395502579255774,
|
| 41589 |
+
"grad_norm": 1.9249030351638794,
|
| 41590 |
+
"learning_rate": 2.7868855053053395e-06,
|
| 41591 |
+
"loss": 0.3347,
|
| 41592 |
+
"step": 59300
|
| 41593 |
+
},
|
| 41594 |
+
{
|
| 41595 |
+
"epoch": 5.940551910652577,
|
| 41596 |
+
"grad_norm": 2.6790075302124023,
|
| 41597 |
+
"learning_rate": 2.7817240679970714e-06,
|
| 41598 |
+
"loss": 0.4179,
|
| 41599 |
+
"step": 59310
|
| 41600 |
+
},
|
| 41601 |
+
{
|
| 41602 |
+
"epoch": 5.941553563379577,
|
| 41603 |
+
"grad_norm": 2.345093250274658,
|
| 41604 |
+
"learning_rate": 2.776567133135605e-06,
|
| 41605 |
+
"loss": 0.3902,
|
| 41606 |
+
"step": 59320
|
| 41607 |
+
},
|
| 41608 |
+
{
|
| 41609 |
+
"epoch": 5.942555216106576,
|
| 41610 |
+
"grad_norm": 2.184208393096924,
|
| 41611 |
+
"learning_rate": 2.771414701765962e-06,
|
| 41612 |
+
"loss": 0.4259,
|
| 41613 |
+
"step": 59330
|
| 41614 |
+
},
|
| 41615 |
+
{
|
| 41616 |
+
"epoch": 5.943556868833575,
|
| 41617 |
+
"grad_norm": 2.410226583480835,
|
| 41618 |
+
"learning_rate": 2.7662667749322606e-06,
|
| 41619 |
+
"loss": 0.4249,
|
| 41620 |
+
"step": 59340
|
| 41621 |
+
},
|
| 41622 |
+
{
|
| 41623 |
+
"epoch": 5.944558521560575,
|
| 41624 |
+
"grad_norm": 2.012917995452881,
|
| 41625 |
+
"learning_rate": 2.761123353677708e-06,
|
| 41626 |
+
"loss": 0.3801,
|
| 41627 |
+
"step": 59350
|
| 41628 |
+
},
|
| 41629 |
+
{
|
| 41630 |
+
"epoch": 5.945560174287575,
|
| 41631 |
+
"grad_norm": 2.3022515773773193,
|
| 41632 |
+
"learning_rate": 2.755984439044615e-06,
|
| 41633 |
+
"loss": 0.4264,
|
| 41634 |
+
"step": 59360
|
| 41635 |
+
},
|
| 41636 |
+
{
|
| 41637 |
+
"epoch": 5.946561827014574,
|
| 41638 |
+
"grad_norm": 2.5932109355926514,
|
| 41639 |
+
"learning_rate": 2.7508500320743426e-06,
|
| 41640 |
+
"loss": 0.4608,
|
| 41641 |
+
"step": 59370
|
| 41642 |
+
},
|
| 41643 |
+
{
|
| 41644 |
+
"epoch": 5.947563479741573,
|
| 41645 |
+
"grad_norm": 2.0590479373931885,
|
| 41646 |
+
"learning_rate": 2.745720133807361e-06,
|
| 41647 |
+
"loss": 0.4668,
|
| 41648 |
+
"step": 59380
|
| 41649 |
+
},
|
| 41650 |
+
{
|
| 41651 |
+
"epoch": 5.948565132468573,
|
| 41652 |
+
"grad_norm": 2.1030056476593018,
|
| 41653 |
+
"learning_rate": 2.7405947452832397e-06,
|
| 41654 |
+
"loss": 0.4404,
|
| 41655 |
+
"step": 59390
|
| 41656 |
+
},
|
| 41657 |
+
{
|
| 41658 |
+
"epoch": 5.9495667851955725,
|
| 41659 |
+
"grad_norm": 1.6264331340789795,
|
| 41660 |
+
"learning_rate": 2.7354738675406028e-06,
|
| 41661 |
+
"loss": 0.3538,
|
| 41662 |
+
"step": 59400
|
| 41663 |
+
},
|
| 41664 |
+
{
|
| 41665 |
+
"epoch": 5.9505684379225725,
|
| 41666 |
+
"grad_norm": 2.1686582565307617,
|
| 41667 |
+
"learning_rate": 2.7303575016171813e-06,
|
| 41668 |
+
"loss": 0.426,
|
| 41669 |
+
"step": 59410
|
| 41670 |
+
},
|
| 41671 |
+
{
|
| 41672 |
+
"epoch": 5.951570090649572,
|
| 41673 |
+
"grad_norm": 1.9982020854949951,
|
| 41674 |
+
"learning_rate": 2.725245648549779e-06,
|
| 41675 |
+
"loss": 0.4315,
|
| 41676 |
+
"step": 59420
|
| 41677 |
+
},
|
| 41678 |
+
{
|
| 41679 |
+
"epoch": 5.952571743376572,
|
| 41680 |
+
"grad_norm": 2.0062196254730225,
|
| 41681 |
+
"learning_rate": 2.7201383093743123e-06,
|
| 41682 |
+
"loss": 0.401,
|
| 41683 |
+
"step": 59430
|
| 41684 |
+
},
|
| 41685 |
+
{
|
| 41686 |
+
"epoch": 5.953573396103571,
|
| 41687 |
+
"grad_norm": 2.404799461364746,
|
| 41688 |
+
"learning_rate": 2.715035485125747e-06,
|
| 41689 |
+
"loss": 0.4355,
|
| 41690 |
+
"step": 59440
|
| 41691 |
+
},
|
| 41692 |
+
{
|
| 41693 |
+
"epoch": 5.95457504883057,
|
| 41694 |
+
"grad_norm": 2.1522059440612793,
|
| 41695 |
+
"learning_rate": 2.709937176838151e-06,
|
| 41696 |
+
"loss": 0.4299,
|
| 41697 |
+
"step": 59450
|
| 41698 |
+
},
|
| 41699 |
+
{
|
| 41700 |
+
"epoch": 5.95557670155757,
|
| 41701 |
+
"grad_norm": 2.9422500133514404,
|
| 41702 |
+
"learning_rate": 2.7048433855446893e-06,
|
| 41703 |
+
"loss": 0.4022,
|
| 41704 |
+
"step": 59460
|
| 41705 |
+
},
|
| 41706 |
+
{
|
| 41707 |
+
"epoch": 5.956578354284569,
|
| 41708 |
+
"grad_norm": 1.841374158859253,
|
| 41709 |
+
"learning_rate": 2.699754112277586e-06,
|
| 41710 |
+
"loss": 0.3839,
|
| 41711 |
+
"step": 59470
|
| 41712 |
+
},
|
| 41713 |
+
{
|
| 41714 |
+
"epoch": 5.957580007011569,
|
| 41715 |
+
"grad_norm": 2.179323196411133,
|
| 41716 |
+
"learning_rate": 2.694669358068161e-06,
|
| 41717 |
+
"loss": 0.4221,
|
| 41718 |
+
"step": 59480
|
| 41719 |
+
},
|
| 41720 |
+
{
|
| 41721 |
+
"epoch": 5.958581659738568,
|
| 41722 |
+
"grad_norm": 2.291766881942749,
|
| 41723 |
+
"learning_rate": 2.6895891239468313e-06,
|
| 41724 |
+
"loss": 0.4304,
|
| 41725 |
+
"step": 59490
|
| 41726 |
+
},
|
| 41727 |
+
{
|
| 41728 |
+
"epoch": 5.959583312465568,
|
| 41729 |
+
"grad_norm": 2.427757978439331,
|
| 41730 |
+
"learning_rate": 2.684513410943085e-06,
|
| 41731 |
+
"loss": 0.4321,
|
| 41732 |
+
"step": 59500
|
| 41733 |
+
},
|
| 41734 |
+
{
|
| 41735 |
+
"epoch": 5.9605849651925675,
|
| 41736 |
+
"grad_norm": 2.763577938079834,
|
| 41737 |
+
"learning_rate": 2.679442220085487e-06,
|
| 41738 |
+
"loss": 0.3849,
|
| 41739 |
+
"step": 59510
|
| 41740 |
+
},
|
| 41741 |
+
{
|
| 41742 |
+
"epoch": 5.961586617919568,
|
| 41743 |
+
"grad_norm": 2.1000256538391113,
|
| 41744 |
+
"learning_rate": 2.6743755524016952e-06,
|
| 41745 |
+
"loss": 0.4208,
|
| 41746 |
+
"step": 59520
|
| 41747 |
+
},
|
| 41748 |
+
{
|
| 41749 |
+
"epoch": 5.962588270646567,
|
| 41750 |
+
"grad_norm": 2.173295736312866,
|
| 41751 |
+
"learning_rate": 2.6693134089184556e-06,
|
| 41752 |
+
"loss": 0.3923,
|
| 41753 |
+
"step": 59530
|
| 41754 |
+
},
|
| 41755 |
+
{
|
| 41756 |
+
"epoch": 5.963589923373567,
|
| 41757 |
+
"grad_norm": 3.0745134353637695,
|
| 41758 |
+
"learning_rate": 2.6642557906615962e-06,
|
| 41759 |
+
"loss": 0.3629,
|
| 41760 |
+
"step": 59540
|
| 41761 |
+
},
|
| 41762 |
+
{
|
| 41763 |
+
"epoch": 5.964591576100566,
|
| 41764 |
+
"grad_norm": 2.7793359756469727,
|
| 41765 |
+
"learning_rate": 2.6592026986560077e-06,
|
| 41766 |
+
"loss": 0.4423,
|
| 41767 |
+
"step": 59550
|
| 41768 |
+
},
|
| 41769 |
+
{
|
| 41770 |
+
"epoch": 5.965593228827565,
|
| 41771 |
+
"grad_norm": 2.4544596672058105,
|
| 41772 |
+
"learning_rate": 2.6541541339256904e-06,
|
| 41773 |
+
"loss": 0.385,
|
| 41774 |
+
"step": 59560
|
| 41775 |
+
},
|
| 41776 |
+
{
|
| 41777 |
+
"epoch": 5.966594881554565,
|
| 41778 |
+
"grad_norm": 2.3058173656463623,
|
| 41779 |
+
"learning_rate": 2.6491100974937235e-06,
|
| 41780 |
+
"loss": 0.4043,
|
| 41781 |
+
"step": 59570
|
| 41782 |
+
},
|
| 41783 |
+
{
|
| 41784 |
+
"epoch": 5.967596534281564,
|
| 41785 |
+
"grad_norm": 2.2369930744171143,
|
| 41786 |
+
"learning_rate": 2.6440705903822444e-06,
|
| 41787 |
+
"loss": 0.4034,
|
| 41788 |
+
"step": 59580
|
| 41789 |
+
},
|
| 41790 |
+
{
|
| 41791 |
+
"epoch": 5.968598187008564,
|
| 41792 |
+
"grad_norm": 2.3988139629364014,
|
| 41793 |
+
"learning_rate": 2.639035613612495e-06,
|
| 41794 |
+
"loss": 0.4246,
|
| 41795 |
+
"step": 59590
|
| 41796 |
+
},
|
| 41797 |
+
{
|
| 41798 |
+
"epoch": 5.969599839735563,
|
| 41799 |
+
"grad_norm": 1.8981486558914185,
|
| 41800 |
+
"learning_rate": 2.634005168204798e-06,
|
| 41801 |
+
"loss": 0.4158,
|
| 41802 |
+
"step": 59600
|
| 41803 |
+
},
|
| 41804 |
+
{
|
| 41805 |
+
"epoch": 5.970601492462563,
|
| 41806 |
+
"grad_norm": 2.494128465652466,
|
| 41807 |
+
"learning_rate": 2.6289792551785596e-06,
|
| 41808 |
+
"loss": 0.3885,
|
| 41809 |
+
"step": 59610
|
| 41810 |
+
},
|
| 41811 |
+
{
|
| 41812 |
+
"epoch": 5.971603145189563,
|
| 41813 |
+
"grad_norm": 1.9671915769577026,
|
| 41814 |
+
"learning_rate": 2.623957875552241e-06,
|
| 41815 |
+
"loss": 0.3754,
|
| 41816 |
+
"step": 59620
|
| 41817 |
+
},
|
| 41818 |
+
{
|
| 41819 |
+
"epoch": 5.972604797916563,
|
| 41820 |
+
"grad_norm": 2.8741295337677,
|
| 41821 |
+
"learning_rate": 2.618941030343425e-06,
|
| 41822 |
+
"loss": 0.4058,
|
| 41823 |
+
"step": 59630
|
| 41824 |
+
},
|
| 41825 |
+
{
|
| 41826 |
+
"epoch": 5.973606450643562,
|
| 41827 |
+
"grad_norm": 2.0449655055999756,
|
| 41828 |
+
"learning_rate": 2.613928720568745e-06,
|
| 41829 |
+
"loss": 0.3866,
|
| 41830 |
+
"step": 59640
|
| 41831 |
+
},
|
| 41832 |
+
{
|
| 41833 |
+
"epoch": 5.974608103370562,
|
| 41834 |
+
"grad_norm": 2.0897884368896484,
|
| 41835 |
+
"learning_rate": 2.608920947243937e-06,
|
| 41836 |
+
"loss": 0.4542,
|
| 41837 |
+
"step": 59650
|
| 41838 |
+
},
|
| 41839 |
+
{
|
| 41840 |
+
"epoch": 5.975609756097561,
|
| 41841 |
+
"grad_norm": 1.8200316429138184,
|
| 41842 |
+
"learning_rate": 2.6039177113837882e-06,
|
| 41843 |
+
"loss": 0.4314,
|
| 41844 |
+
"step": 59660
|
| 41845 |
+
},
|
| 41846 |
+
{
|
| 41847 |
+
"epoch": 5.97661140882456,
|
| 41848 |
+
"grad_norm": 2.550520420074463,
|
| 41849 |
+
"learning_rate": 2.598919014002202e-06,
|
| 41850 |
+
"loss": 0.4228,
|
| 41851 |
+
"step": 59670
|
| 41852 |
+
},
|
| 41853 |
+
{
|
| 41854 |
+
"epoch": 5.97761306155156,
|
| 41855 |
+
"grad_norm": 2.2803761959075928,
|
| 41856 |
+
"learning_rate": 2.5939248561121398e-06,
|
| 41857 |
+
"loss": 0.4327,
|
| 41858 |
+
"step": 59680
|
| 41859 |
+
},
|
| 41860 |
+
{
|
| 41861 |
+
"epoch": 5.978614714278559,
|
| 41862 |
+
"grad_norm": 2.195887804031372,
|
| 41863 |
+
"learning_rate": 2.58893523872564e-06,
|
| 41864 |
+
"loss": 0.4606,
|
| 41865 |
+
"step": 59690
|
| 41866 |
+
},
|
| 41867 |
+
{
|
| 41868 |
+
"epoch": 5.979616367005559,
|
| 41869 |
+
"grad_norm": 2.5205256938934326,
|
| 41870 |
+
"learning_rate": 2.5839501628538406e-06,
|
| 41871 |
+
"loss": 0.4165,
|
| 41872 |
+
"step": 59700
|
| 41873 |
+
},
|
| 41874 |
+
{
|
| 41875 |
+
"epoch": 5.9806180197325585,
|
| 41876 |
+
"grad_norm": 2.2707231044769287,
|
| 41877 |
+
"learning_rate": 2.5789696295069416e-06,
|
| 41878 |
+
"loss": 0.379,
|
| 41879 |
+
"step": 59710
|
| 41880 |
+
},
|
| 41881 |
+
{
|
| 41882 |
+
"epoch": 5.9816196724595585,
|
| 41883 |
+
"grad_norm": 2.755429983139038,
|
| 41884 |
+
"learning_rate": 2.573993639694236e-06,
|
| 41885 |
+
"loss": 0.3939,
|
| 41886 |
+
"step": 59720
|
| 41887 |
+
},
|
| 41888 |
+
{
|
| 41889 |
+
"epoch": 5.982621325186558,
|
| 41890 |
+
"grad_norm": 2.25152587890625,
|
| 41891 |
+
"learning_rate": 2.5690221944240727e-06,
|
| 41892 |
+
"loss": 0.4411,
|
| 41893 |
+
"step": 59730
|
| 41894 |
+
},
|
| 41895 |
+
{
|
| 41896 |
+
"epoch": 5.983622977913558,
|
| 41897 |
+
"grad_norm": 2.7713489532470703,
|
| 41898 |
+
"learning_rate": 2.5640552947039127e-06,
|
| 41899 |
+
"loss": 0.426,
|
| 41900 |
+
"step": 59740
|
| 41901 |
+
},
|
| 41902 |
+
{
|
| 41903 |
+
"epoch": 5.984624630640557,
|
| 41904 |
+
"grad_norm": 2.5069236755371094,
|
| 41905 |
+
"learning_rate": 2.5590929415402697e-06,
|
| 41906 |
+
"loss": 0.3881,
|
| 41907 |
+
"step": 59750
|
| 41908 |
+
},
|
| 41909 |
+
{
|
| 41910 |
+
"epoch": 5.985626283367557,
|
| 41911 |
+
"grad_norm": 1.884039044380188,
|
| 41912 |
+
"learning_rate": 2.5541351359387453e-06,
|
| 41913 |
+
"loss": 0.3725,
|
| 41914 |
+
"step": 59760
|
| 41915 |
+
},
|
| 41916 |
+
{
|
| 41917 |
+
"epoch": 5.986627936094556,
|
| 41918 |
+
"grad_norm": 2.1357924938201904,
|
| 41919 |
+
"learning_rate": 2.549181878904025e-06,
|
| 41920 |
+
"loss": 0.3889,
|
| 41921 |
+
"step": 59770
|
| 41922 |
+
},
|
| 41923 |
+
{
|
| 41924 |
+
"epoch": 5.987629588821555,
|
| 41925 |
+
"grad_norm": 2.1714611053466797,
|
| 41926 |
+
"learning_rate": 2.544233171439861e-06,
|
| 41927 |
+
"loss": 0.4213,
|
| 41928 |
+
"step": 59780
|
| 41929 |
+
},
|
| 41930 |
+
{
|
| 41931 |
+
"epoch": 5.988631241548555,
|
| 41932 |
+
"grad_norm": 2.4911916255950928,
|
| 41933 |
+
"learning_rate": 2.539289014549098e-06,
|
| 41934 |
+
"loss": 0.3941,
|
| 41935 |
+
"step": 59790
|
| 41936 |
+
},
|
| 41937 |
+
{
|
| 41938 |
+
"epoch": 5.989632894275554,
|
| 41939 |
+
"grad_norm": 1.662967324256897,
|
| 41940 |
+
"learning_rate": 2.534349409233633e-06,
|
| 41941 |
+
"loss": 0.3508,
|
| 41942 |
+
"step": 59800
|
| 41943 |
+
},
|
| 41944 |
+
{
|
| 41945 |
+
"epoch": 5.990634547002554,
|
| 41946 |
+
"grad_norm": 1.9502700567245483,
|
| 41947 |
+
"learning_rate": 2.529414356494475e-06,
|
| 41948 |
+
"loss": 0.3499,
|
| 41949 |
+
"step": 59810
|
| 41950 |
+
},
|
| 41951 |
+
{
|
| 41952 |
+
"epoch": 5.9916361997295535,
|
| 41953 |
+
"grad_norm": 2.755403995513916,
|
| 41954 |
+
"learning_rate": 2.5244838573316857e-06,
|
| 41955 |
+
"loss": 0.4458,
|
| 41956 |
+
"step": 59820
|
| 41957 |
+
},
|
| 41958 |
+
{
|
| 41959 |
+
"epoch": 5.992637852456554,
|
| 41960 |
+
"grad_norm": 2.3988425731658936,
|
| 41961 |
+
"learning_rate": 2.5195579127444134e-06,
|
| 41962 |
+
"loss": 0.4289,
|
| 41963 |
+
"step": 59830
|
| 41964 |
+
},
|
| 41965 |
+
{
|
| 41966 |
+
"epoch": 5.993639505183553,
|
| 41967 |
+
"grad_norm": 2.6327574253082275,
|
| 41968 |
+
"learning_rate": 2.5146365237308815e-06,
|
| 41969 |
+
"loss": 0.4813,
|
| 41970 |
+
"step": 59840
|
| 41971 |
+
},
|
| 41972 |
+
{
|
| 41973 |
+
"epoch": 5.994641157910553,
|
| 41974 |
+
"grad_norm": 2.2136104106903076,
|
| 41975 |
+
"learning_rate": 2.5097196912883896e-06,
|
| 41976 |
+
"loss": 0.3991,
|
| 41977 |
+
"step": 59850
|
| 41978 |
+
},
|
| 41979 |
+
{
|
| 41980 |
+
"epoch": 5.995642810637552,
|
| 41981 |
+
"grad_norm": 2.383437395095825,
|
| 41982 |
+
"learning_rate": 2.504807416413313e-06,
|
| 41983 |
+
"loss": 0.4518,
|
| 41984 |
+
"step": 59860
|
| 41985 |
+
},
|
| 41986 |
+
{
|
| 41987 |
+
"epoch": 5.996644463364552,
|
| 41988 |
+
"grad_norm": 2.0825629234313965,
|
| 41989 |
+
"learning_rate": 2.4998997001011075e-06,
|
| 41990 |
+
"loss": 0.3727,
|
| 41991 |
+
"step": 59870
|
| 41992 |
+
},
|
| 41993 |
+
{
|
| 41994 |
+
"epoch": 5.997646116091551,
|
| 41995 |
+
"grad_norm": 1.9307478666305542,
|
| 41996 |
+
"learning_rate": 2.4949965433463036e-06,
|
| 41997 |
+
"loss": 0.4497,
|
| 41998 |
+
"step": 59880
|
| 41999 |
+
},
|
| 42000 |
+
{
|
| 42001 |
+
"epoch": 5.99864776881855,
|
| 42002 |
+
"grad_norm": 2.087782859802246,
|
| 42003 |
+
"learning_rate": 2.490097947142503e-06,
|
| 42004 |
+
"loss": 0.4169,
|
| 42005 |
+
"step": 59890
|
| 42006 |
+
},
|
| 42007 |
+
{
|
| 42008 |
+
"epoch": 5.99964942154555,
|
| 42009 |
+
"grad_norm": 2.0872836112976074,
|
| 42010 |
+
"learning_rate": 2.4852039124823867e-06,
|
| 42011 |
+
"loss": 0.4388,
|
| 42012 |
+
"step": 59900
|
| 42013 |
+
},
|
| 42014 |
+
{
|
| 42015 |
+
"epoch": 6.000600991636199,
|
| 42016 |
+
"grad_norm": 2.266829013824463,
|
| 42017 |
+
"learning_rate": 2.4803144403577173e-06,
|
| 42018 |
+
"loss": 0.4137,
|
| 42019 |
+
"step": 59910
|
| 42020 |
+
},
|
| 42021 |
+
{
|
| 42022 |
+
"epoch": 6.001602644363199,
|
| 42023 |
+
"grad_norm": 2.115668535232544,
|
| 42024 |
+
"learning_rate": 2.475429531759321e-06,
|
| 42025 |
+
"loss": 0.3987,
|
| 42026 |
+
"step": 59920
|
| 42027 |
+
},
|
| 42028 |
+
{
|
| 42029 |
+
"epoch": 6.0026042970901985,
|
| 42030 |
+
"grad_norm": 1.9069344997406006,
|
| 42031 |
+
"learning_rate": 2.4705491876771123e-06,
|
| 42032 |
+
"loss": 0.3974,
|
| 42033 |
+
"step": 59930
|
| 42034 |
+
},
|
| 42035 |
+
{
|
| 42036 |
+
"epoch": 6.0036059498171985,
|
| 42037 |
+
"grad_norm": 2.177665948867798,
|
| 42038 |
+
"learning_rate": 2.4656734091000667e-06,
|
| 42039 |
+
"loss": 0.3814,
|
| 42040 |
+
"step": 59940
|
| 42041 |
+
},
|
| 42042 |
+
{
|
| 42043 |
+
"epoch": 6.004607602544198,
|
| 42044 |
+
"grad_norm": 2.3072803020477295,
|
| 42045 |
+
"learning_rate": 2.460802197016246e-06,
|
| 42046 |
+
"loss": 0.3967,
|
| 42047 |
+
"step": 59950
|
| 42048 |
+
},
|
| 42049 |
+
{
|
| 42050 |
+
"epoch": 6.005609255271198,
|
| 42051 |
+
"grad_norm": 2.0694549083709717,
|
| 42052 |
+
"learning_rate": 2.45593555241278e-06,
|
| 42053 |
+
"loss": 0.4141,
|
| 42054 |
+
"step": 59960
|
| 42055 |
+
},
|
| 42056 |
+
{
|
| 42057 |
+
"epoch": 6.006610907998197,
|
| 42058 |
+
"grad_norm": 2.099717378616333,
|
| 42059 |
+
"learning_rate": 2.451073476275878e-06,
|
| 42060 |
+
"loss": 0.3583,
|
| 42061 |
+
"step": 59970
|
| 42062 |
+
},
|
| 42063 |
+
{
|
| 42064 |
+
"epoch": 6.007612560725197,
|
| 42065 |
+
"grad_norm": 2.319852828979492,
|
| 42066 |
+
"learning_rate": 2.4462159695908205e-06,
|
| 42067 |
+
"loss": 0.4224,
|
| 42068 |
+
"step": 59980
|
| 42069 |
+
},
|
| 42070 |
+
{
|
| 42071 |
+
"epoch": 6.008614213452196,
|
| 42072 |
+
"grad_norm": 2.5351221561431885,
|
| 42073 |
+
"learning_rate": 2.441363033341956e-06,
|
| 42074 |
+
"loss": 0.4387,
|
| 42075 |
+
"step": 59990
|
| 42076 |
+
},
|
| 42077 |
+
{
|
| 42078 |
+
"epoch": 6.009615866179196,
|
| 42079 |
+
"grad_norm": 2.4294748306274414,
|
| 42080 |
+
"learning_rate": 2.436514668512721e-06,
|
| 42081 |
+
"loss": 0.4391,
|
| 42082 |
+
"step": 60000
|
| 42083 |
+
},
|
| 42084 |
+
{
|
| 42085 |
+
"epoch": 6.009615866179196,
|
| 42086 |
+
"eval_bleu": 0.4047969489431023,
|
| 42087 |
+
"eval_loss": 0.5045724511146545,
|
| 42088 |
+
"eval_rouge1": 0.7142618960035121,
|
| 42089 |
+
"eval_rouge2": 0.5468159528123441,
|
| 42090 |
+
"eval_rougeL": 0.6724157251031586,
|
| 42091 |
+
"eval_runtime": 77026.2325,
|
| 42092 |
+
"eval_samples_per_second": 0.23,
|
| 42093 |
+
"eval_steps_per_second": 0.029,
|
| 42094 |
+
"eval_wer": 0.6602967184122618,
|
| 42095 |
+
"step": 60000
|
| 42096 |
}
|
| 42097 |
],
|
| 42098 |
"logging_steps": 10,
|
|
|
|
| 42112 |
"attributes": {}
|
| 42113 |
}
|
| 42114 |
},
|
| 42115 |
+
"total_flos": 3.296250259911475e+21,
|
| 42116 |
"train_batch_size": 8,
|
| 42117 |
"trial_name": null,
|
| 42118 |
"trial_params": null
|