Training in progress, step 2500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2329638768
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f593217a3c5e01370692eb71ba7f5ce25c543091bc2f8eeef0292c51c041cc23
|
| 3 |
size 2329638768
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4659454507
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:749f387256de30ee51880e9ef2dfb2b1e6ceb1e622be17844a84dcf5eaa7e723
|
| 3 |
size 4659454507
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a2657373aa0f87b2c3a00df701f48aa9492e1ec5aac846047c549c358090975
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be42dc90f8ee672f7a9de40f3d664002e5c546bf6cb806a995c84eb587b80525
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 2000,
|
| 3 |
"best_metric": 0.0734301209449768,
|
| 4 |
"best_model_checkpoint": "/content/working/mt5-model/checkpoint-2000",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -14040,6 +14040,3514 @@
|
|
| 14040 |
"eval_samples_per_second": 453.509,
|
| 14041 |
"eval_steps_per_second": 14.191,
|
| 14042 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14043 |
}
|
| 14044 |
],
|
| 14045 |
"logging_steps": 1,
|
|
@@ -14054,7 +17562,7 @@
|
|
| 14054 |
"early_stopping_threshold": 0.0
|
| 14055 |
},
|
| 14056 |
"attributes": {
|
| 14057 |
-
"early_stopping_patience_counter":
|
| 14058 |
}
|
| 14059 |
},
|
| 14060 |
"TrainerControl": {
|
|
@@ -14068,7 +17576,7 @@
|
|
| 14068 |
"attributes": {}
|
| 14069 |
}
|
| 14070 |
},
|
| 14071 |
-
"total_flos":
|
| 14072 |
"train_batch_size": 32,
|
| 14073 |
"trial_name": null,
|
| 14074 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 2000,
|
| 3 |
"best_metric": 0.0734301209449768,
|
| 4 |
"best_model_checkpoint": "/content/working/mt5-model/checkpoint-2000",
|
| 5 |
+
"epoch": 4.9603174603174605,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 2500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 14040 |
"eval_samples_per_second": 453.509,
|
| 14041 |
"eval_steps_per_second": 14.191,
|
| 14042 |
"step": 2000
|
| 14043 |
+
},
|
| 14044 |
+
{
|
| 14045 |
+
"epoch": 3.9702380952380953,
|
| 14046 |
+
"grad_norm": 506.9632873535156,
|
| 14047 |
+
"learning_rate": 1.2871287128712873e-05,
|
| 14048 |
+
"loss": 0.1617,
|
| 14049 |
+
"step": 2001
|
| 14050 |
+
},
|
| 14051 |
+
{
|
| 14052 |
+
"epoch": 3.9722222222222223,
|
| 14053 |
+
"grad_norm": 159.58740234375,
|
| 14054 |
+
"learning_rate": 1.2846534653465347e-05,
|
| 14055 |
+
"loss": 0.2799,
|
| 14056 |
+
"step": 2002
|
| 14057 |
+
},
|
| 14058 |
+
{
|
| 14059 |
+
"epoch": 3.9742063492063493,
|
| 14060 |
+
"grad_norm": 2582.400634765625,
|
| 14061 |
+
"learning_rate": 1.2821782178217823e-05,
|
| 14062 |
+
"loss": 0.1698,
|
| 14063 |
+
"step": 2003
|
| 14064 |
+
},
|
| 14065 |
+
{
|
| 14066 |
+
"epoch": 3.9761904761904763,
|
| 14067 |
+
"grad_norm": 378.0476989746094,
|
| 14068 |
+
"learning_rate": 1.2797029702970297e-05,
|
| 14069 |
+
"loss": 0.1592,
|
| 14070 |
+
"step": 2004
|
| 14071 |
+
},
|
| 14072 |
+
{
|
| 14073 |
+
"epoch": 3.9781746031746033,
|
| 14074 |
+
"grad_norm": 989.1727905273438,
|
| 14075 |
+
"learning_rate": 1.2772277227722773e-05,
|
| 14076 |
+
"loss": 0.1286,
|
| 14077 |
+
"step": 2005
|
| 14078 |
+
},
|
| 14079 |
+
{
|
| 14080 |
+
"epoch": 3.9801587301587302,
|
| 14081 |
+
"grad_norm": 1220.7086181640625,
|
| 14082 |
+
"learning_rate": 1.2747524752475248e-05,
|
| 14083 |
+
"loss": 0.1636,
|
| 14084 |
+
"step": 2006
|
| 14085 |
+
},
|
| 14086 |
+
{
|
| 14087 |
+
"epoch": 3.982142857142857,
|
| 14088 |
+
"grad_norm": 322.2386779785156,
|
| 14089 |
+
"learning_rate": 1.2722772277227724e-05,
|
| 14090 |
+
"loss": 0.1367,
|
| 14091 |
+
"step": 2007
|
| 14092 |
+
},
|
| 14093 |
+
{
|
| 14094 |
+
"epoch": 3.984126984126984,
|
| 14095 |
+
"grad_norm": 275.83648681640625,
|
| 14096 |
+
"learning_rate": 1.26980198019802e-05,
|
| 14097 |
+
"loss": 0.1393,
|
| 14098 |
+
"step": 2008
|
| 14099 |
+
},
|
| 14100 |
+
{
|
| 14101 |
+
"epoch": 3.986111111111111,
|
| 14102 |
+
"grad_norm": 37.701011657714844,
|
| 14103 |
+
"learning_rate": 1.2673267326732674e-05,
|
| 14104 |
+
"loss": 0.0873,
|
| 14105 |
+
"step": 2009
|
| 14106 |
+
},
|
| 14107 |
+
{
|
| 14108 |
+
"epoch": 3.988095238095238,
|
| 14109 |
+
"grad_norm": 243.19874572753906,
|
| 14110 |
+
"learning_rate": 1.264851485148515e-05,
|
| 14111 |
+
"loss": 0.1521,
|
| 14112 |
+
"step": 2010
|
| 14113 |
+
},
|
| 14114 |
+
{
|
| 14115 |
+
"epoch": 3.990079365079365,
|
| 14116 |
+
"grad_norm": 378.2672119140625,
|
| 14117 |
+
"learning_rate": 1.2623762376237624e-05,
|
| 14118 |
+
"loss": 0.1511,
|
| 14119 |
+
"step": 2011
|
| 14120 |
+
},
|
| 14121 |
+
{
|
| 14122 |
+
"epoch": 3.992063492063492,
|
| 14123 |
+
"grad_norm": 108.33940887451172,
|
| 14124 |
+
"learning_rate": 1.25990099009901e-05,
|
| 14125 |
+
"loss": 0.1268,
|
| 14126 |
+
"step": 2012
|
| 14127 |
+
},
|
| 14128 |
+
{
|
| 14129 |
+
"epoch": 3.994047619047619,
|
| 14130 |
+
"grad_norm": 2048.461181640625,
|
| 14131 |
+
"learning_rate": 1.2574257425742574e-05,
|
| 14132 |
+
"loss": 0.1494,
|
| 14133 |
+
"step": 2013
|
| 14134 |
+
},
|
| 14135 |
+
{
|
| 14136 |
+
"epoch": 3.996031746031746,
|
| 14137 |
+
"grad_norm": 129.11721801757812,
|
| 14138 |
+
"learning_rate": 1.254950495049505e-05,
|
| 14139 |
+
"loss": 0.1307,
|
| 14140 |
+
"step": 2014
|
| 14141 |
+
},
|
| 14142 |
+
{
|
| 14143 |
+
"epoch": 3.998015873015873,
|
| 14144 |
+
"grad_norm": 43.189186096191406,
|
| 14145 |
+
"learning_rate": 1.2524752475247525e-05,
|
| 14146 |
+
"loss": 0.0984,
|
| 14147 |
+
"step": 2015
|
| 14148 |
+
},
|
| 14149 |
+
{
|
| 14150 |
+
"epoch": 4.0,
|
| 14151 |
+
"grad_norm": 249.77349853515625,
|
| 14152 |
+
"learning_rate": 1.25e-05,
|
| 14153 |
+
"loss": 0.1744,
|
| 14154 |
+
"step": 2016
|
| 14155 |
+
},
|
| 14156 |
+
{
|
| 14157 |
+
"epoch": 4.001984126984127,
|
| 14158 |
+
"grad_norm": 16.607669830322266,
|
| 14159 |
+
"learning_rate": 1.2475247524752477e-05,
|
| 14160 |
+
"loss": 0.1375,
|
| 14161 |
+
"step": 2017
|
| 14162 |
+
},
|
| 14163 |
+
{
|
| 14164 |
+
"epoch": 4.003968253968254,
|
| 14165 |
+
"grad_norm": 186.6886444091797,
|
| 14166 |
+
"learning_rate": 1.245049504950495e-05,
|
| 14167 |
+
"loss": 0.1617,
|
| 14168 |
+
"step": 2018
|
| 14169 |
+
},
|
| 14170 |
+
{
|
| 14171 |
+
"epoch": 4.005952380952381,
|
| 14172 |
+
"grad_norm": 1185.2125244140625,
|
| 14173 |
+
"learning_rate": 1.2425742574257427e-05,
|
| 14174 |
+
"loss": 0.2715,
|
| 14175 |
+
"step": 2019
|
| 14176 |
+
},
|
| 14177 |
+
{
|
| 14178 |
+
"epoch": 4.007936507936508,
|
| 14179 |
+
"grad_norm": 170.1215057373047,
|
| 14180 |
+
"learning_rate": 1.2400990099009901e-05,
|
| 14181 |
+
"loss": 0.1497,
|
| 14182 |
+
"step": 2020
|
| 14183 |
+
},
|
| 14184 |
+
{
|
| 14185 |
+
"epoch": 4.009920634920635,
|
| 14186 |
+
"grad_norm": 618.3604125976562,
|
| 14187 |
+
"learning_rate": 1.2376237623762377e-05,
|
| 14188 |
+
"loss": 0.1393,
|
| 14189 |
+
"step": 2021
|
| 14190 |
+
},
|
| 14191 |
+
{
|
| 14192 |
+
"epoch": 4.011904761904762,
|
| 14193 |
+
"grad_norm": 150.28140258789062,
|
| 14194 |
+
"learning_rate": 1.2351485148514851e-05,
|
| 14195 |
+
"loss": 0.1247,
|
| 14196 |
+
"step": 2022
|
| 14197 |
+
},
|
| 14198 |
+
{
|
| 14199 |
+
"epoch": 4.013888888888889,
|
| 14200 |
+
"grad_norm": 325.0749206542969,
|
| 14201 |
+
"learning_rate": 1.2326732673267327e-05,
|
| 14202 |
+
"loss": 0.2959,
|
| 14203 |
+
"step": 2023
|
| 14204 |
+
},
|
| 14205 |
+
{
|
| 14206 |
+
"epoch": 4.015873015873016,
|
| 14207 |
+
"grad_norm": 130.74429321289062,
|
| 14208 |
+
"learning_rate": 1.2301980198019802e-05,
|
| 14209 |
+
"loss": 0.1268,
|
| 14210 |
+
"step": 2024
|
| 14211 |
+
},
|
| 14212 |
+
{
|
| 14213 |
+
"epoch": 4.017857142857143,
|
| 14214 |
+
"grad_norm": 43.91646194458008,
|
| 14215 |
+
"learning_rate": 1.2277227722772278e-05,
|
| 14216 |
+
"loss": 0.136,
|
| 14217 |
+
"step": 2025
|
| 14218 |
+
},
|
| 14219 |
+
{
|
| 14220 |
+
"epoch": 4.01984126984127,
|
| 14221 |
+
"grad_norm": 48.95199203491211,
|
| 14222 |
+
"learning_rate": 1.2252475247524754e-05,
|
| 14223 |
+
"loss": 0.1019,
|
| 14224 |
+
"step": 2026
|
| 14225 |
+
},
|
| 14226 |
+
{
|
| 14227 |
+
"epoch": 4.021825396825397,
|
| 14228 |
+
"grad_norm": 108.6751708984375,
|
| 14229 |
+
"learning_rate": 1.2227722772277228e-05,
|
| 14230 |
+
"loss": 0.1452,
|
| 14231 |
+
"step": 2027
|
| 14232 |
+
},
|
| 14233 |
+
{
|
| 14234 |
+
"epoch": 4.023809523809524,
|
| 14235 |
+
"grad_norm": 8.463884353637695,
|
| 14236 |
+
"learning_rate": 1.2202970297029704e-05,
|
| 14237 |
+
"loss": 0.0986,
|
| 14238 |
+
"step": 2028
|
| 14239 |
+
},
|
| 14240 |
+
{
|
| 14241 |
+
"epoch": 4.025793650793651,
|
| 14242 |
+
"grad_norm": 182.42562866210938,
|
| 14243 |
+
"learning_rate": 1.2178217821782178e-05,
|
| 14244 |
+
"loss": 0.1201,
|
| 14245 |
+
"step": 2029
|
| 14246 |
+
},
|
| 14247 |
+
{
|
| 14248 |
+
"epoch": 4.027777777777778,
|
| 14249 |
+
"grad_norm": 180.56643676757812,
|
| 14250 |
+
"learning_rate": 1.2153465346534654e-05,
|
| 14251 |
+
"loss": 0.1358,
|
| 14252 |
+
"step": 2030
|
| 14253 |
+
},
|
| 14254 |
+
{
|
| 14255 |
+
"epoch": 4.029761904761905,
|
| 14256 |
+
"grad_norm": 53.46614456176758,
|
| 14257 |
+
"learning_rate": 1.2128712871287128e-05,
|
| 14258 |
+
"loss": 0.209,
|
| 14259 |
+
"step": 2031
|
| 14260 |
+
},
|
| 14261 |
+
{
|
| 14262 |
+
"epoch": 4.031746031746032,
|
| 14263 |
+
"grad_norm": 243.12811279296875,
|
| 14264 |
+
"learning_rate": 1.2103960396039604e-05,
|
| 14265 |
+
"loss": 0.1119,
|
| 14266 |
+
"step": 2032
|
| 14267 |
+
},
|
| 14268 |
+
{
|
| 14269 |
+
"epoch": 4.033730158730159,
|
| 14270 |
+
"grad_norm": 390.42852783203125,
|
| 14271 |
+
"learning_rate": 1.207920792079208e-05,
|
| 14272 |
+
"loss": 0.1173,
|
| 14273 |
+
"step": 2033
|
| 14274 |
+
},
|
| 14275 |
+
{
|
| 14276 |
+
"epoch": 4.035714285714286,
|
| 14277 |
+
"grad_norm": 129.40390014648438,
|
| 14278 |
+
"learning_rate": 1.2054455445544555e-05,
|
| 14279 |
+
"loss": 0.1293,
|
| 14280 |
+
"step": 2034
|
| 14281 |
+
},
|
| 14282 |
+
{
|
| 14283 |
+
"epoch": 4.037698412698413,
|
| 14284 |
+
"grad_norm": 25.4575138092041,
|
| 14285 |
+
"learning_rate": 1.202970297029703e-05,
|
| 14286 |
+
"loss": 0.1045,
|
| 14287 |
+
"step": 2035
|
| 14288 |
+
},
|
| 14289 |
+
{
|
| 14290 |
+
"epoch": 4.0396825396825395,
|
| 14291 |
+
"grad_norm": 119.89496612548828,
|
| 14292 |
+
"learning_rate": 1.2004950495049505e-05,
|
| 14293 |
+
"loss": 0.1402,
|
| 14294 |
+
"step": 2036
|
| 14295 |
+
},
|
| 14296 |
+
{
|
| 14297 |
+
"epoch": 4.041666666666667,
|
| 14298 |
+
"grad_norm": 134.82168579101562,
|
| 14299 |
+
"learning_rate": 1.198019801980198e-05,
|
| 14300 |
+
"loss": 0.1018,
|
| 14301 |
+
"step": 2037
|
| 14302 |
+
},
|
| 14303 |
+
{
|
| 14304 |
+
"epoch": 4.0436507936507935,
|
| 14305 |
+
"grad_norm": 103.36309814453125,
|
| 14306 |
+
"learning_rate": 1.1955445544554455e-05,
|
| 14307 |
+
"loss": 0.1013,
|
| 14308 |
+
"step": 2038
|
| 14309 |
+
},
|
| 14310 |
+
{
|
| 14311 |
+
"epoch": 4.045634920634921,
|
| 14312 |
+
"grad_norm": 78.91583251953125,
|
| 14313 |
+
"learning_rate": 1.1930693069306931e-05,
|
| 14314 |
+
"loss": 0.1167,
|
| 14315 |
+
"step": 2039
|
| 14316 |
+
},
|
| 14317 |
+
{
|
| 14318 |
+
"epoch": 4.0476190476190474,
|
| 14319 |
+
"grad_norm": 240.01792907714844,
|
| 14320 |
+
"learning_rate": 1.1905940594059405e-05,
|
| 14321 |
+
"loss": 0.1434,
|
| 14322 |
+
"step": 2040
|
| 14323 |
+
},
|
| 14324 |
+
{
|
| 14325 |
+
"epoch": 4.049603174603175,
|
| 14326 |
+
"grad_norm": 832.37353515625,
|
| 14327 |
+
"learning_rate": 1.1881188118811881e-05,
|
| 14328 |
+
"loss": 0.1394,
|
| 14329 |
+
"step": 2041
|
| 14330 |
+
},
|
| 14331 |
+
{
|
| 14332 |
+
"epoch": 4.051587301587301,
|
| 14333 |
+
"grad_norm": 148.79266357421875,
|
| 14334 |
+
"learning_rate": 1.1856435643564357e-05,
|
| 14335 |
+
"loss": 0.1388,
|
| 14336 |
+
"step": 2042
|
| 14337 |
+
},
|
| 14338 |
+
{
|
| 14339 |
+
"epoch": 4.053571428571429,
|
| 14340 |
+
"grad_norm": 112.59849548339844,
|
| 14341 |
+
"learning_rate": 1.1831683168316831e-05,
|
| 14342 |
+
"loss": 0.129,
|
| 14343 |
+
"step": 2043
|
| 14344 |
+
},
|
| 14345 |
+
{
|
| 14346 |
+
"epoch": 4.055555555555555,
|
| 14347 |
+
"grad_norm": 138.16001892089844,
|
| 14348 |
+
"learning_rate": 1.1806930693069307e-05,
|
| 14349 |
+
"loss": 0.1655,
|
| 14350 |
+
"step": 2044
|
| 14351 |
+
},
|
| 14352 |
+
{
|
| 14353 |
+
"epoch": 4.057539682539683,
|
| 14354 |
+
"grad_norm": 217.28553771972656,
|
| 14355 |
+
"learning_rate": 1.1782178217821782e-05,
|
| 14356 |
+
"loss": 0.1595,
|
| 14357 |
+
"step": 2045
|
| 14358 |
+
},
|
| 14359 |
+
{
|
| 14360 |
+
"epoch": 4.059523809523809,
|
| 14361 |
+
"grad_norm": 70.22457122802734,
|
| 14362 |
+
"learning_rate": 1.1757425742574258e-05,
|
| 14363 |
+
"loss": 0.1389,
|
| 14364 |
+
"step": 2046
|
| 14365 |
+
},
|
| 14366 |
+
{
|
| 14367 |
+
"epoch": 4.061507936507937,
|
| 14368 |
+
"grad_norm": 214.07992553710938,
|
| 14369 |
+
"learning_rate": 1.1732673267326732e-05,
|
| 14370 |
+
"loss": 0.129,
|
| 14371 |
+
"step": 2047
|
| 14372 |
+
},
|
| 14373 |
+
{
|
| 14374 |
+
"epoch": 4.063492063492063,
|
| 14375 |
+
"grad_norm": 58.192779541015625,
|
| 14376 |
+
"learning_rate": 1.170792079207921e-05,
|
| 14377 |
+
"loss": 0.1247,
|
| 14378 |
+
"step": 2048
|
| 14379 |
+
},
|
| 14380 |
+
{
|
| 14381 |
+
"epoch": 4.065476190476191,
|
| 14382 |
+
"grad_norm": 76.61177825927734,
|
| 14383 |
+
"learning_rate": 1.1683168316831684e-05,
|
| 14384 |
+
"loss": 0.1567,
|
| 14385 |
+
"step": 2049
|
| 14386 |
+
},
|
| 14387 |
+
{
|
| 14388 |
+
"epoch": 4.067460317460317,
|
| 14389 |
+
"grad_norm": 321.9510498046875,
|
| 14390 |
+
"learning_rate": 1.165841584158416e-05,
|
| 14391 |
+
"loss": 0.163,
|
| 14392 |
+
"step": 2050
|
| 14393 |
+
},
|
| 14394 |
+
{
|
| 14395 |
+
"epoch": 4.069444444444445,
|
| 14396 |
+
"grad_norm": 118.4086685180664,
|
| 14397 |
+
"learning_rate": 1.1633663366336634e-05,
|
| 14398 |
+
"loss": 0.1282,
|
| 14399 |
+
"step": 2051
|
| 14400 |
+
},
|
| 14401 |
+
{
|
| 14402 |
+
"epoch": 4.071428571428571,
|
| 14403 |
+
"grad_norm": 308.5423889160156,
|
| 14404 |
+
"learning_rate": 1.160891089108911e-05,
|
| 14405 |
+
"loss": 0.1187,
|
| 14406 |
+
"step": 2052
|
| 14407 |
+
},
|
| 14408 |
+
{
|
| 14409 |
+
"epoch": 4.073412698412699,
|
| 14410 |
+
"grad_norm": 455.6436767578125,
|
| 14411 |
+
"learning_rate": 1.1584158415841586e-05,
|
| 14412 |
+
"loss": 0.1222,
|
| 14413 |
+
"step": 2053
|
| 14414 |
+
},
|
| 14415 |
+
{
|
| 14416 |
+
"epoch": 4.075396825396825,
|
| 14417 |
+
"grad_norm": 252.35995483398438,
|
| 14418 |
+
"learning_rate": 1.155940594059406e-05,
|
| 14419 |
+
"loss": 0.1114,
|
| 14420 |
+
"step": 2054
|
| 14421 |
+
},
|
| 14422 |
+
{
|
| 14423 |
+
"epoch": 4.0773809523809526,
|
| 14424 |
+
"grad_norm": 646.7742309570312,
|
| 14425 |
+
"learning_rate": 1.1534653465346536e-05,
|
| 14426 |
+
"loss": 0.1215,
|
| 14427 |
+
"step": 2055
|
| 14428 |
+
},
|
| 14429 |
+
{
|
| 14430 |
+
"epoch": 4.079365079365079,
|
| 14431 |
+
"grad_norm": 10727.0283203125,
|
| 14432 |
+
"learning_rate": 1.150990099009901e-05,
|
| 14433 |
+
"loss": 0.1345,
|
| 14434 |
+
"step": 2056
|
| 14435 |
+
},
|
| 14436 |
+
{
|
| 14437 |
+
"epoch": 4.0813492063492065,
|
| 14438 |
+
"grad_norm": 203.8192596435547,
|
| 14439 |
+
"learning_rate": 1.1485148514851487e-05,
|
| 14440 |
+
"loss": 0.1498,
|
| 14441 |
+
"step": 2057
|
| 14442 |
+
},
|
| 14443 |
+
{
|
| 14444 |
+
"epoch": 4.083333333333333,
|
| 14445 |
+
"grad_norm": 63.352386474609375,
|
| 14446 |
+
"learning_rate": 1.1460396039603961e-05,
|
| 14447 |
+
"loss": 0.1414,
|
| 14448 |
+
"step": 2058
|
| 14449 |
+
},
|
| 14450 |
+
{
|
| 14451 |
+
"epoch": 4.0853174603174605,
|
| 14452 |
+
"grad_norm": 162.06802368164062,
|
| 14453 |
+
"learning_rate": 1.1435643564356437e-05,
|
| 14454 |
+
"loss": 0.1618,
|
| 14455 |
+
"step": 2059
|
| 14456 |
+
},
|
| 14457 |
+
{
|
| 14458 |
+
"epoch": 4.087301587301587,
|
| 14459 |
+
"grad_norm": 213.84620666503906,
|
| 14460 |
+
"learning_rate": 1.1410891089108911e-05,
|
| 14461 |
+
"loss": 0.1719,
|
| 14462 |
+
"step": 2060
|
| 14463 |
+
},
|
| 14464 |
+
{
|
| 14465 |
+
"epoch": 4.089285714285714,
|
| 14466 |
+
"grad_norm": 74.56119537353516,
|
| 14467 |
+
"learning_rate": 1.1386138613861387e-05,
|
| 14468 |
+
"loss": 0.1892,
|
| 14469 |
+
"step": 2061
|
| 14470 |
+
},
|
| 14471 |
+
{
|
| 14472 |
+
"epoch": 4.091269841269841,
|
| 14473 |
+
"grad_norm": 265.3496398925781,
|
| 14474 |
+
"learning_rate": 1.1361386138613863e-05,
|
| 14475 |
+
"loss": 0.1524,
|
| 14476 |
+
"step": 2062
|
| 14477 |
+
},
|
| 14478 |
+
{
|
| 14479 |
+
"epoch": 4.093253968253968,
|
| 14480 |
+
"grad_norm": 3035.932373046875,
|
| 14481 |
+
"learning_rate": 1.1336633663366337e-05,
|
| 14482 |
+
"loss": 0.1743,
|
| 14483 |
+
"step": 2063
|
| 14484 |
+
},
|
| 14485 |
+
{
|
| 14486 |
+
"epoch": 4.095238095238095,
|
| 14487 |
+
"grad_norm": 236.17384338378906,
|
| 14488 |
+
"learning_rate": 1.1311881188118813e-05,
|
| 14489 |
+
"loss": 0.1092,
|
| 14490 |
+
"step": 2064
|
| 14491 |
+
},
|
| 14492 |
+
{
|
| 14493 |
+
"epoch": 4.097222222222222,
|
| 14494 |
+
"grad_norm": 170.4839630126953,
|
| 14495 |
+
"learning_rate": 1.1287128712871288e-05,
|
| 14496 |
+
"loss": 0.1557,
|
| 14497 |
+
"step": 2065
|
| 14498 |
+
},
|
| 14499 |
+
{
|
| 14500 |
+
"epoch": 4.099206349206349,
|
| 14501 |
+
"grad_norm": 157.25772094726562,
|
| 14502 |
+
"learning_rate": 1.1262376237623764e-05,
|
| 14503 |
+
"loss": 0.1446,
|
| 14504 |
+
"step": 2066
|
| 14505 |
+
},
|
| 14506 |
+
{
|
| 14507 |
+
"epoch": 4.101190476190476,
|
| 14508 |
+
"grad_norm": 755.6629638671875,
|
| 14509 |
+
"learning_rate": 1.1237623762376238e-05,
|
| 14510 |
+
"loss": 0.1718,
|
| 14511 |
+
"step": 2067
|
| 14512 |
+
},
|
| 14513 |
+
{
|
| 14514 |
+
"epoch": 4.103174603174603,
|
| 14515 |
+
"grad_norm": 183.63429260253906,
|
| 14516 |
+
"learning_rate": 1.1212871287128714e-05,
|
| 14517 |
+
"loss": 0.2143,
|
| 14518 |
+
"step": 2068
|
| 14519 |
+
},
|
| 14520 |
+
{
|
| 14521 |
+
"epoch": 4.10515873015873,
|
| 14522 |
+
"grad_norm": 160.2490997314453,
|
| 14523 |
+
"learning_rate": 1.1188118811881188e-05,
|
| 14524 |
+
"loss": 0.1333,
|
| 14525 |
+
"step": 2069
|
| 14526 |
+
},
|
| 14527 |
+
{
|
| 14528 |
+
"epoch": 4.107142857142857,
|
| 14529 |
+
"grad_norm": 62.349857330322266,
|
| 14530 |
+
"learning_rate": 1.1163366336633664e-05,
|
| 14531 |
+
"loss": 0.1164,
|
| 14532 |
+
"step": 2070
|
| 14533 |
+
},
|
| 14534 |
+
{
|
| 14535 |
+
"epoch": 4.109126984126984,
|
| 14536 |
+
"grad_norm": 1108.14306640625,
|
| 14537 |
+
"learning_rate": 1.113861386138614e-05,
|
| 14538 |
+
"loss": 0.1296,
|
| 14539 |
+
"step": 2071
|
| 14540 |
+
},
|
| 14541 |
+
{
|
| 14542 |
+
"epoch": 4.111111111111111,
|
| 14543 |
+
"grad_norm": 128.0206756591797,
|
| 14544 |
+
"learning_rate": 1.1113861386138614e-05,
|
| 14545 |
+
"loss": 0.1488,
|
| 14546 |
+
"step": 2072
|
| 14547 |
+
},
|
| 14548 |
+
{
|
| 14549 |
+
"epoch": 4.113095238095238,
|
| 14550 |
+
"grad_norm": 944.4619140625,
|
| 14551 |
+
"learning_rate": 1.108910891089109e-05,
|
| 14552 |
+
"loss": 0.1832,
|
| 14553 |
+
"step": 2073
|
| 14554 |
+
},
|
| 14555 |
+
{
|
| 14556 |
+
"epoch": 4.115079365079365,
|
| 14557 |
+
"grad_norm": 520.0220336914062,
|
| 14558 |
+
"learning_rate": 1.1064356435643565e-05,
|
| 14559 |
+
"loss": 0.1039,
|
| 14560 |
+
"step": 2074
|
| 14561 |
+
},
|
| 14562 |
+
{
|
| 14563 |
+
"epoch": 4.117063492063492,
|
| 14564 |
+
"grad_norm": 732.43017578125,
|
| 14565 |
+
"learning_rate": 1.103960396039604e-05,
|
| 14566 |
+
"loss": 0.1439,
|
| 14567 |
+
"step": 2075
|
| 14568 |
+
},
|
| 14569 |
+
{
|
| 14570 |
+
"epoch": 4.119047619047619,
|
| 14571 |
+
"grad_norm": 279.7555847167969,
|
| 14572 |
+
"learning_rate": 1.1014851485148515e-05,
|
| 14573 |
+
"loss": 0.1448,
|
| 14574 |
+
"step": 2076
|
| 14575 |
+
},
|
| 14576 |
+
{
|
| 14577 |
+
"epoch": 4.121031746031746,
|
| 14578 |
+
"grad_norm": 588.6355590820312,
|
| 14579 |
+
"learning_rate": 1.099009900990099e-05,
|
| 14580 |
+
"loss": 0.1537,
|
| 14581 |
+
"step": 2077
|
| 14582 |
+
},
|
| 14583 |
+
{
|
| 14584 |
+
"epoch": 4.123015873015873,
|
| 14585 |
+
"grad_norm": 174.8496551513672,
|
| 14586 |
+
"learning_rate": 1.0965346534653465e-05,
|
| 14587 |
+
"loss": 0.1366,
|
| 14588 |
+
"step": 2078
|
| 14589 |
+
},
|
| 14590 |
+
{
|
| 14591 |
+
"epoch": 4.125,
|
| 14592 |
+
"grad_norm": 790.6178588867188,
|
| 14593 |
+
"learning_rate": 1.0940594059405941e-05,
|
| 14594 |
+
"loss": 0.3031,
|
| 14595 |
+
"step": 2079
|
| 14596 |
+
},
|
| 14597 |
+
{
|
| 14598 |
+
"epoch": 4.1269841269841265,
|
| 14599 |
+
"grad_norm": 205.83494567871094,
|
| 14600 |
+
"learning_rate": 1.0915841584158417e-05,
|
| 14601 |
+
"loss": 0.1261,
|
| 14602 |
+
"step": 2080
|
| 14603 |
+
},
|
| 14604 |
+
{
|
| 14605 |
+
"epoch": 4.128968253968254,
|
| 14606 |
+
"grad_norm": 64.3187484741211,
|
| 14607 |
+
"learning_rate": 1.0891089108910891e-05,
|
| 14608 |
+
"loss": 0.1302,
|
| 14609 |
+
"step": 2081
|
| 14610 |
+
},
|
| 14611 |
+
{
|
| 14612 |
+
"epoch": 4.130952380952381,
|
| 14613 |
+
"grad_norm": 150.97560119628906,
|
| 14614 |
+
"learning_rate": 1.0866336633663367e-05,
|
| 14615 |
+
"loss": 0.1305,
|
| 14616 |
+
"step": 2082
|
| 14617 |
+
},
|
| 14618 |
+
{
|
| 14619 |
+
"epoch": 4.132936507936508,
|
| 14620 |
+
"grad_norm": 160.1085205078125,
|
| 14621 |
+
"learning_rate": 1.0841584158415842e-05,
|
| 14622 |
+
"loss": 0.1536,
|
| 14623 |
+
"step": 2083
|
| 14624 |
+
},
|
| 14625 |
+
{
|
| 14626 |
+
"epoch": 4.134920634920635,
|
| 14627 |
+
"grad_norm": 228.52468872070312,
|
| 14628 |
+
"learning_rate": 1.0816831683168317e-05,
|
| 14629 |
+
"loss": 0.0961,
|
| 14630 |
+
"step": 2084
|
| 14631 |
+
},
|
| 14632 |
+
{
|
| 14633 |
+
"epoch": 4.136904761904762,
|
| 14634 |
+
"grad_norm": 115.7950439453125,
|
| 14635 |
+
"learning_rate": 1.0792079207920792e-05,
|
| 14636 |
+
"loss": 0.1939,
|
| 14637 |
+
"step": 2085
|
| 14638 |
+
},
|
| 14639 |
+
{
|
| 14640 |
+
"epoch": 4.138888888888889,
|
| 14641 |
+
"grad_norm": 199.59735107421875,
|
| 14642 |
+
"learning_rate": 1.0767326732673268e-05,
|
| 14643 |
+
"loss": 0.1377,
|
| 14644 |
+
"step": 2086
|
| 14645 |
+
},
|
| 14646 |
+
{
|
| 14647 |
+
"epoch": 4.140873015873016,
|
| 14648 |
+
"grad_norm": 74.76663970947266,
|
| 14649 |
+
"learning_rate": 1.0742574257425744e-05,
|
| 14650 |
+
"loss": 0.1271,
|
| 14651 |
+
"step": 2087
|
| 14652 |
+
},
|
| 14653 |
+
{
|
| 14654 |
+
"epoch": 4.142857142857143,
|
| 14655 |
+
"grad_norm": 100.28704833984375,
|
| 14656 |
+
"learning_rate": 1.0717821782178218e-05,
|
| 14657 |
+
"loss": 0.1383,
|
| 14658 |
+
"step": 2088
|
| 14659 |
+
},
|
| 14660 |
+
{
|
| 14661 |
+
"epoch": 4.14484126984127,
|
| 14662 |
+
"grad_norm": 467.0574951171875,
|
| 14663 |
+
"learning_rate": 1.0693069306930694e-05,
|
| 14664 |
+
"loss": 0.1488,
|
| 14665 |
+
"step": 2089
|
| 14666 |
+
},
|
| 14667 |
+
{
|
| 14668 |
+
"epoch": 4.146825396825397,
|
| 14669 |
+
"grad_norm": 250.61990356445312,
|
| 14670 |
+
"learning_rate": 1.0668316831683168e-05,
|
| 14671 |
+
"loss": 0.1856,
|
| 14672 |
+
"step": 2090
|
| 14673 |
+
},
|
| 14674 |
+
{
|
| 14675 |
+
"epoch": 4.148809523809524,
|
| 14676 |
+
"grad_norm": 690.1161499023438,
|
| 14677 |
+
"learning_rate": 1.0643564356435644e-05,
|
| 14678 |
+
"loss": 0.2727,
|
| 14679 |
+
"step": 2091
|
| 14680 |
+
},
|
| 14681 |
+
{
|
| 14682 |
+
"epoch": 4.150793650793651,
|
| 14683 |
+
"grad_norm": 86.5639877319336,
|
| 14684 |
+
"learning_rate": 1.0618811881188118e-05,
|
| 14685 |
+
"loss": 0.1237,
|
| 14686 |
+
"step": 2092
|
| 14687 |
+
},
|
| 14688 |
+
{
|
| 14689 |
+
"epoch": 4.152777777777778,
|
| 14690 |
+
"grad_norm": 26.524070739746094,
|
| 14691 |
+
"learning_rate": 1.0594059405940594e-05,
|
| 14692 |
+
"loss": 0.1224,
|
| 14693 |
+
"step": 2093
|
| 14694 |
+
},
|
| 14695 |
+
{
|
| 14696 |
+
"epoch": 4.154761904761905,
|
| 14697 |
+
"grad_norm": 68.96733093261719,
|
| 14698 |
+
"learning_rate": 1.0569306930693069e-05,
|
| 14699 |
+
"loss": 0.1968,
|
| 14700 |
+
"step": 2094
|
| 14701 |
+
},
|
| 14702 |
+
{
|
| 14703 |
+
"epoch": 4.156746031746032,
|
| 14704 |
+
"grad_norm": 175.83175659179688,
|
| 14705 |
+
"learning_rate": 1.0544554455445545e-05,
|
| 14706 |
+
"loss": 0.1162,
|
| 14707 |
+
"step": 2095
|
| 14708 |
+
},
|
| 14709 |
+
{
|
| 14710 |
+
"epoch": 4.158730158730159,
|
| 14711 |
+
"grad_norm": 1525.2396240234375,
|
| 14712 |
+
"learning_rate": 1.051980198019802e-05,
|
| 14713 |
+
"loss": 0.1197,
|
| 14714 |
+
"step": 2096
|
| 14715 |
+
},
|
| 14716 |
+
{
|
| 14717 |
+
"epoch": 4.160714285714286,
|
| 14718 |
+
"grad_norm": 182.71054077148438,
|
| 14719 |
+
"learning_rate": 1.0495049504950495e-05,
|
| 14720 |
+
"loss": 0.4312,
|
| 14721 |
+
"step": 2097
|
| 14722 |
+
},
|
| 14723 |
+
{
|
| 14724 |
+
"epoch": 4.162698412698413,
|
| 14725 |
+
"grad_norm": 105.45257568359375,
|
| 14726 |
+
"learning_rate": 1.0470297029702971e-05,
|
| 14727 |
+
"loss": 0.1162,
|
| 14728 |
+
"step": 2098
|
| 14729 |
+
},
|
| 14730 |
+
{
|
| 14731 |
+
"epoch": 4.1646825396825395,
|
| 14732 |
+
"grad_norm": 146.93807983398438,
|
| 14733 |
+
"learning_rate": 1.0445544554455445e-05,
|
| 14734 |
+
"loss": 0.1148,
|
| 14735 |
+
"step": 2099
|
| 14736 |
+
},
|
| 14737 |
+
{
|
| 14738 |
+
"epoch": 4.166666666666667,
|
| 14739 |
+
"grad_norm": 408.9582214355469,
|
| 14740 |
+
"learning_rate": 1.0420792079207921e-05,
|
| 14741 |
+
"loss": 0.1235,
|
| 14742 |
+
"step": 2100
|
| 14743 |
+
},
|
| 14744 |
+
{
|
| 14745 |
+
"epoch": 4.1686507936507935,
|
| 14746 |
+
"grad_norm": 57.804710388183594,
|
| 14747 |
+
"learning_rate": 1.0396039603960395e-05,
|
| 14748 |
+
"loss": 0.1322,
|
| 14749 |
+
"step": 2101
|
| 14750 |
+
},
|
| 14751 |
+
{
|
| 14752 |
+
"epoch": 4.170634920634921,
|
| 14753 |
+
"grad_norm": 47.50266647338867,
|
| 14754 |
+
"learning_rate": 1.0371287128712871e-05,
|
| 14755 |
+
"loss": 0.1301,
|
| 14756 |
+
"step": 2102
|
| 14757 |
+
},
|
| 14758 |
+
{
|
| 14759 |
+
"epoch": 4.1726190476190474,
|
| 14760 |
+
"grad_norm": 176.2106170654297,
|
| 14761 |
+
"learning_rate": 1.0346534653465346e-05,
|
| 14762 |
+
"loss": 0.1167,
|
| 14763 |
+
"step": 2103
|
| 14764 |
+
},
|
| 14765 |
+
{
|
| 14766 |
+
"epoch": 4.174603174603175,
|
| 14767 |
+
"grad_norm": 522.767578125,
|
| 14768 |
+
"learning_rate": 1.0321782178217822e-05,
|
| 14769 |
+
"loss": 0.1477,
|
| 14770 |
+
"step": 2104
|
| 14771 |
+
},
|
| 14772 |
+
{
|
| 14773 |
+
"epoch": 4.176587301587301,
|
| 14774 |
+
"grad_norm": 1277.5574951171875,
|
| 14775 |
+
"learning_rate": 1.0297029702970298e-05,
|
| 14776 |
+
"loss": 0.1299,
|
| 14777 |
+
"step": 2105
|
| 14778 |
+
},
|
| 14779 |
+
{
|
| 14780 |
+
"epoch": 4.178571428571429,
|
| 14781 |
+
"grad_norm": 158.62039184570312,
|
| 14782 |
+
"learning_rate": 1.0272277227722772e-05,
|
| 14783 |
+
"loss": 0.1191,
|
| 14784 |
+
"step": 2106
|
| 14785 |
+
},
|
| 14786 |
+
{
|
| 14787 |
+
"epoch": 4.180555555555555,
|
| 14788 |
+
"grad_norm": 428.6009826660156,
|
| 14789 |
+
"learning_rate": 1.0247524752475248e-05,
|
| 14790 |
+
"loss": 0.1398,
|
| 14791 |
+
"step": 2107
|
| 14792 |
+
},
|
| 14793 |
+
{
|
| 14794 |
+
"epoch": 4.182539682539683,
|
| 14795 |
+
"grad_norm": 191.56063842773438,
|
| 14796 |
+
"learning_rate": 1.0222772277227722e-05,
|
| 14797 |
+
"loss": 0.1433,
|
| 14798 |
+
"step": 2108
|
| 14799 |
+
},
|
| 14800 |
+
{
|
| 14801 |
+
"epoch": 4.184523809523809,
|
| 14802 |
+
"grad_norm": 129.01991271972656,
|
| 14803 |
+
"learning_rate": 1.0198019801980198e-05,
|
| 14804 |
+
"loss": 0.1527,
|
| 14805 |
+
"step": 2109
|
| 14806 |
+
},
|
| 14807 |
+
{
|
| 14808 |
+
"epoch": 4.186507936507937,
|
| 14809 |
+
"grad_norm": 537.6927490234375,
|
| 14810 |
+
"learning_rate": 1.0173267326732672e-05,
|
| 14811 |
+
"loss": 0.3337,
|
| 14812 |
+
"step": 2110
|
| 14813 |
+
},
|
| 14814 |
+
{
|
| 14815 |
+
"epoch": 4.188492063492063,
|
| 14816 |
+
"grad_norm": 151.9865264892578,
|
| 14817 |
+
"learning_rate": 1.014851485148515e-05,
|
| 14818 |
+
"loss": 0.1131,
|
| 14819 |
+
"step": 2111
|
| 14820 |
+
},
|
| 14821 |
+
{
|
| 14822 |
+
"epoch": 4.190476190476191,
|
| 14823 |
+
"grad_norm": 124.66220092773438,
|
| 14824 |
+
"learning_rate": 1.0123762376237624e-05,
|
| 14825 |
+
"loss": 0.1267,
|
| 14826 |
+
"step": 2112
|
| 14827 |
+
},
|
| 14828 |
+
{
|
| 14829 |
+
"epoch": 4.192460317460317,
|
| 14830 |
+
"grad_norm": 251.38673400878906,
|
| 14831 |
+
"learning_rate": 1.00990099009901e-05,
|
| 14832 |
+
"loss": 0.1158,
|
| 14833 |
+
"step": 2113
|
| 14834 |
+
},
|
| 14835 |
+
{
|
| 14836 |
+
"epoch": 4.194444444444445,
|
| 14837 |
+
"grad_norm": 47.095367431640625,
|
| 14838 |
+
"learning_rate": 1.0074257425742575e-05,
|
| 14839 |
+
"loss": 0.1677,
|
| 14840 |
+
"step": 2114
|
| 14841 |
+
},
|
| 14842 |
+
{
|
| 14843 |
+
"epoch": 4.196428571428571,
|
| 14844 |
+
"grad_norm": 191.38722229003906,
|
| 14845 |
+
"learning_rate": 1.004950495049505e-05,
|
| 14846 |
+
"loss": 0.1077,
|
| 14847 |
+
"step": 2115
|
| 14848 |
+
},
|
| 14849 |
+
{
|
| 14850 |
+
"epoch": 4.198412698412699,
|
| 14851 |
+
"grad_norm": 609.5339965820312,
|
| 14852 |
+
"learning_rate": 1.0024752475247527e-05,
|
| 14853 |
+
"loss": 0.1609,
|
| 14854 |
+
"step": 2116
|
| 14855 |
+
},
|
| 14856 |
+
{
|
| 14857 |
+
"epoch": 4.200396825396825,
|
| 14858 |
+
"grad_norm": 193.63494873046875,
|
| 14859 |
+
"learning_rate": 1e-05,
|
| 14860 |
+
"loss": 0.1503,
|
| 14861 |
+
"step": 2117
|
| 14862 |
+
},
|
| 14863 |
+
{
|
| 14864 |
+
"epoch": 4.2023809523809526,
|
| 14865 |
+
"grad_norm": 422.1722717285156,
|
| 14866 |
+
"learning_rate": 9.975247524752477e-06,
|
| 14867 |
+
"loss": 0.143,
|
| 14868 |
+
"step": 2118
|
| 14869 |
+
},
|
| 14870 |
+
{
|
| 14871 |
+
"epoch": 4.204365079365079,
|
| 14872 |
+
"grad_norm": 403.1889953613281,
|
| 14873 |
+
"learning_rate": 9.950495049504951e-06,
|
| 14874 |
+
"loss": 0.1031,
|
| 14875 |
+
"step": 2119
|
| 14876 |
+
},
|
| 14877 |
+
{
|
| 14878 |
+
"epoch": 4.2063492063492065,
|
| 14879 |
+
"grad_norm": 1224.9788818359375,
|
| 14880 |
+
"learning_rate": 9.925742574257427e-06,
|
| 14881 |
+
"loss": 0.2763,
|
| 14882 |
+
"step": 2120
|
| 14883 |
+
},
|
| 14884 |
+
{
|
| 14885 |
+
"epoch": 4.208333333333333,
|
| 14886 |
+
"grad_norm": 105.5435791015625,
|
| 14887 |
+
"learning_rate": 9.900990099009901e-06,
|
| 14888 |
+
"loss": 0.1204,
|
| 14889 |
+
"step": 2121
|
| 14890 |
+
},
|
| 14891 |
+
{
|
| 14892 |
+
"epoch": 4.2103174603174605,
|
| 14893 |
+
"grad_norm": 59.124961853027344,
|
| 14894 |
+
"learning_rate": 9.876237623762377e-06,
|
| 14895 |
+
"loss": 0.095,
|
| 14896 |
+
"step": 2122
|
| 14897 |
+
},
|
| 14898 |
+
{
|
| 14899 |
+
"epoch": 4.212301587301587,
|
| 14900 |
+
"grad_norm": 115.74383544921875,
|
| 14901 |
+
"learning_rate": 9.851485148514852e-06,
|
| 14902 |
+
"loss": 0.0984,
|
| 14903 |
+
"step": 2123
|
| 14904 |
+
},
|
| 14905 |
+
{
|
| 14906 |
+
"epoch": 4.214285714285714,
|
| 14907 |
+
"grad_norm": 381.5096130371094,
|
| 14908 |
+
"learning_rate": 9.826732673267328e-06,
|
| 14909 |
+
"loss": 0.2643,
|
| 14910 |
+
"step": 2124
|
| 14911 |
+
},
|
| 14912 |
+
{
|
| 14913 |
+
"epoch": 4.216269841269841,
|
| 14914 |
+
"grad_norm": 194.9180450439453,
|
| 14915 |
+
"learning_rate": 9.801980198019804e-06,
|
| 14916 |
+
"loss": 0.183,
|
| 14917 |
+
"step": 2125
|
| 14918 |
+
},
|
| 14919 |
+
{
|
| 14920 |
+
"epoch": 4.218253968253968,
|
| 14921 |
+
"grad_norm": 122.01882934570312,
|
| 14922 |
+
"learning_rate": 9.777227722772278e-06,
|
| 14923 |
+
"loss": 0.1906,
|
| 14924 |
+
"step": 2126
|
| 14925 |
+
},
|
| 14926 |
+
{
|
| 14927 |
+
"epoch": 4.220238095238095,
|
| 14928 |
+
"grad_norm": 284.7005310058594,
|
| 14929 |
+
"learning_rate": 9.752475247524754e-06,
|
| 14930 |
+
"loss": 0.1322,
|
| 14931 |
+
"step": 2127
|
| 14932 |
+
},
|
| 14933 |
+
{
|
| 14934 |
+
"epoch": 4.222222222222222,
|
| 14935 |
+
"grad_norm": 304.650146484375,
|
| 14936 |
+
"learning_rate": 9.727722772277228e-06,
|
| 14937 |
+
"loss": 0.2038,
|
| 14938 |
+
"step": 2128
|
| 14939 |
+
},
|
| 14940 |
+
{
|
| 14941 |
+
"epoch": 4.224206349206349,
|
| 14942 |
+
"grad_norm": 296.45404052734375,
|
| 14943 |
+
"learning_rate": 9.702970297029704e-06,
|
| 14944 |
+
"loss": 0.1793,
|
| 14945 |
+
"step": 2129
|
| 14946 |
+
},
|
| 14947 |
+
{
|
| 14948 |
+
"epoch": 4.226190476190476,
|
| 14949 |
+
"grad_norm": 802.5886840820312,
|
| 14950 |
+
"learning_rate": 9.678217821782178e-06,
|
| 14951 |
+
"loss": 0.1953,
|
| 14952 |
+
"step": 2130
|
| 14953 |
+
},
|
| 14954 |
+
{
|
| 14955 |
+
"epoch": 4.228174603174603,
|
| 14956 |
+
"grad_norm": 47.33495330810547,
|
| 14957 |
+
"learning_rate": 9.653465346534654e-06,
|
| 14958 |
+
"loss": 0.134,
|
| 14959 |
+
"step": 2131
|
| 14960 |
+
},
|
| 14961 |
+
{
|
| 14962 |
+
"epoch": 4.23015873015873,
|
| 14963 |
+
"grad_norm": 522.0841064453125,
|
| 14964 |
+
"learning_rate": 9.62871287128713e-06,
|
| 14965 |
+
"loss": 0.1436,
|
| 14966 |
+
"step": 2132
|
| 14967 |
+
},
|
| 14968 |
+
{
|
| 14969 |
+
"epoch": 4.232142857142857,
|
| 14970 |
+
"grad_norm": 2914.453125,
|
| 14971 |
+
"learning_rate": 9.603960396039604e-06,
|
| 14972 |
+
"loss": 0.1336,
|
| 14973 |
+
"step": 2133
|
| 14974 |
+
},
|
| 14975 |
+
{
|
| 14976 |
+
"epoch": 4.234126984126984,
|
| 14977 |
+
"grad_norm": 639.9263916015625,
|
| 14978 |
+
"learning_rate": 9.57920792079208e-06,
|
| 14979 |
+
"loss": 0.1204,
|
| 14980 |
+
"step": 2134
|
| 14981 |
+
},
|
| 14982 |
+
{
|
| 14983 |
+
"epoch": 4.236111111111111,
|
| 14984 |
+
"grad_norm": 623.4915771484375,
|
| 14985 |
+
"learning_rate": 9.554455445544555e-06,
|
| 14986 |
+
"loss": 0.115,
|
| 14987 |
+
"step": 2135
|
| 14988 |
+
},
|
| 14989 |
+
{
|
| 14990 |
+
"epoch": 4.238095238095238,
|
| 14991 |
+
"grad_norm": 558.7979125976562,
|
| 14992 |
+
"learning_rate": 9.52970297029703e-06,
|
| 14993 |
+
"loss": 0.1503,
|
| 14994 |
+
"step": 2136
|
| 14995 |
+
},
|
| 14996 |
+
{
|
| 14997 |
+
"epoch": 4.240079365079365,
|
| 14998 |
+
"grad_norm": 164.68435668945312,
|
| 14999 |
+
"learning_rate": 9.504950495049505e-06,
|
| 15000 |
+
"loss": 0.1533,
|
| 15001 |
+
"step": 2137
|
| 15002 |
+
},
|
| 15003 |
+
{
|
| 15004 |
+
"epoch": 4.242063492063492,
|
| 15005 |
+
"grad_norm": 121.57302856445312,
|
| 15006 |
+
"learning_rate": 9.480198019801981e-06,
|
| 15007 |
+
"loss": 0.1273,
|
| 15008 |
+
"step": 2138
|
| 15009 |
+
},
|
| 15010 |
+
{
|
| 15011 |
+
"epoch": 4.244047619047619,
|
| 15012 |
+
"grad_norm": 294.53399658203125,
|
| 15013 |
+
"learning_rate": 9.455445544554455e-06,
|
| 15014 |
+
"loss": 0.162,
|
| 15015 |
+
"step": 2139
|
| 15016 |
+
},
|
| 15017 |
+
{
|
| 15018 |
+
"epoch": 4.246031746031746,
|
| 15019 |
+
"grad_norm": 716.14990234375,
|
| 15020 |
+
"learning_rate": 9.430693069306931e-06,
|
| 15021 |
+
"loss": 0.173,
|
| 15022 |
+
"step": 2140
|
| 15023 |
+
},
|
| 15024 |
+
{
|
| 15025 |
+
"epoch": 4.2480158730158735,
|
| 15026 |
+
"grad_norm": 103.08102416992188,
|
| 15027 |
+
"learning_rate": 9.405940594059407e-06,
|
| 15028 |
+
"loss": 0.2254,
|
| 15029 |
+
"step": 2141
|
| 15030 |
+
},
|
| 15031 |
+
{
|
| 15032 |
+
"epoch": 4.25,
|
| 15033 |
+
"grad_norm": 144.0801239013672,
|
| 15034 |
+
"learning_rate": 9.381188118811881e-06,
|
| 15035 |
+
"loss": 0.1789,
|
| 15036 |
+
"step": 2142
|
| 15037 |
+
},
|
| 15038 |
+
{
|
| 15039 |
+
"epoch": 4.2519841269841265,
|
| 15040 |
+
"grad_norm": 211.16624450683594,
|
| 15041 |
+
"learning_rate": 9.356435643564357e-06,
|
| 15042 |
+
"loss": 0.1259,
|
| 15043 |
+
"step": 2143
|
| 15044 |
+
},
|
| 15045 |
+
{
|
| 15046 |
+
"epoch": 4.253968253968254,
|
| 15047 |
+
"grad_norm": 92.37157440185547,
|
| 15048 |
+
"learning_rate": 9.331683168316832e-06,
|
| 15049 |
+
"loss": 0.3128,
|
| 15050 |
+
"step": 2144
|
| 15051 |
+
},
|
| 15052 |
+
{
|
| 15053 |
+
"epoch": 4.255952380952381,
|
| 15054 |
+
"grad_norm": 79.8806381225586,
|
| 15055 |
+
"learning_rate": 9.306930693069308e-06,
|
| 15056 |
+
"loss": 0.1178,
|
| 15057 |
+
"step": 2145
|
| 15058 |
+
},
|
| 15059 |
+
{
|
| 15060 |
+
"epoch": 4.257936507936508,
|
| 15061 |
+
"grad_norm": 72.54400634765625,
|
| 15062 |
+
"learning_rate": 9.282178217821782e-06,
|
| 15063 |
+
"loss": 0.1399,
|
| 15064 |
+
"step": 2146
|
| 15065 |
+
},
|
| 15066 |
+
{
|
| 15067 |
+
"epoch": 4.259920634920635,
|
| 15068 |
+
"grad_norm": 328.96826171875,
|
| 15069 |
+
"learning_rate": 9.257425742574258e-06,
|
| 15070 |
+
"loss": 0.1615,
|
| 15071 |
+
"step": 2147
|
| 15072 |
+
},
|
| 15073 |
+
{
|
| 15074 |
+
"epoch": 4.261904761904762,
|
| 15075 |
+
"grad_norm": 136.6790771484375,
|
| 15076 |
+
"learning_rate": 9.232673267326732e-06,
|
| 15077 |
+
"loss": 0.1344,
|
| 15078 |
+
"step": 2148
|
| 15079 |
+
},
|
| 15080 |
+
{
|
| 15081 |
+
"epoch": 4.263888888888889,
|
| 15082 |
+
"grad_norm": 35.00862503051758,
|
| 15083 |
+
"learning_rate": 9.207920792079208e-06,
|
| 15084 |
+
"loss": 0.0992,
|
| 15085 |
+
"step": 2149
|
| 15086 |
+
},
|
| 15087 |
+
{
|
| 15088 |
+
"epoch": 4.265873015873016,
|
| 15089 |
+
"grad_norm": 155.98524475097656,
|
| 15090 |
+
"learning_rate": 9.183168316831684e-06,
|
| 15091 |
+
"loss": 0.1148,
|
| 15092 |
+
"step": 2150
|
| 15093 |
+
},
|
| 15094 |
+
{
|
| 15095 |
+
"epoch": 4.267857142857143,
|
| 15096 |
+
"grad_norm": 156.29273986816406,
|
| 15097 |
+
"learning_rate": 9.158415841584158e-06,
|
| 15098 |
+
"loss": 0.1056,
|
| 15099 |
+
"step": 2151
|
| 15100 |
+
},
|
| 15101 |
+
{
|
| 15102 |
+
"epoch": 4.26984126984127,
|
| 15103 |
+
"grad_norm": 1358.5274658203125,
|
| 15104 |
+
"learning_rate": 9.133663366336634e-06,
|
| 15105 |
+
"loss": 0.2789,
|
| 15106 |
+
"step": 2152
|
| 15107 |
+
},
|
| 15108 |
+
{
|
| 15109 |
+
"epoch": 4.271825396825397,
|
| 15110 |
+
"grad_norm": 379.7466125488281,
|
| 15111 |
+
"learning_rate": 9.108910891089109e-06,
|
| 15112 |
+
"loss": 0.1186,
|
| 15113 |
+
"step": 2153
|
| 15114 |
+
},
|
| 15115 |
+
{
|
| 15116 |
+
"epoch": 4.273809523809524,
|
| 15117 |
+
"grad_norm": 164.13929748535156,
|
| 15118 |
+
"learning_rate": 9.084158415841585e-06,
|
| 15119 |
+
"loss": 0.1451,
|
| 15120 |
+
"step": 2154
|
| 15121 |
+
},
|
| 15122 |
+
{
|
| 15123 |
+
"epoch": 4.275793650793651,
|
| 15124 |
+
"grad_norm": 125.25098419189453,
|
| 15125 |
+
"learning_rate": 9.059405940594059e-06,
|
| 15126 |
+
"loss": 0.1399,
|
| 15127 |
+
"step": 2155
|
| 15128 |
+
},
|
| 15129 |
+
{
|
| 15130 |
+
"epoch": 4.277777777777778,
|
| 15131 |
+
"grad_norm": 358.83441162109375,
|
| 15132 |
+
"learning_rate": 9.034653465346535e-06,
|
| 15133 |
+
"loss": 0.1197,
|
| 15134 |
+
"step": 2156
|
| 15135 |
+
},
|
| 15136 |
+
{
|
| 15137 |
+
"epoch": 4.279761904761905,
|
| 15138 |
+
"grad_norm": 386.9868469238281,
|
| 15139 |
+
"learning_rate": 9.00990099009901e-06,
|
| 15140 |
+
"loss": 0.1734,
|
| 15141 |
+
"step": 2157
|
| 15142 |
+
},
|
| 15143 |
+
{
|
| 15144 |
+
"epoch": 4.281746031746032,
|
| 15145 |
+
"grad_norm": 255.5669403076172,
|
| 15146 |
+
"learning_rate": 8.985148514851485e-06,
|
| 15147 |
+
"loss": 0.1383,
|
| 15148 |
+
"step": 2158
|
| 15149 |
+
},
|
| 15150 |
+
{
|
| 15151 |
+
"epoch": 4.283730158730159,
|
| 15152 |
+
"grad_norm": 61.071327209472656,
|
| 15153 |
+
"learning_rate": 8.960396039603961e-06,
|
| 15154 |
+
"loss": 0.1334,
|
| 15155 |
+
"step": 2159
|
| 15156 |
+
},
|
| 15157 |
+
{
|
| 15158 |
+
"epoch": 4.285714285714286,
|
| 15159 |
+
"grad_norm": 176.0297393798828,
|
| 15160 |
+
"learning_rate": 8.935643564356435e-06,
|
| 15161 |
+
"loss": 0.1704,
|
| 15162 |
+
"step": 2160
|
| 15163 |
+
},
|
| 15164 |
+
{
|
| 15165 |
+
"epoch": 4.287698412698413,
|
| 15166 |
+
"grad_norm": 113.34686279296875,
|
| 15167 |
+
"learning_rate": 8.910891089108911e-06,
|
| 15168 |
+
"loss": 0.1431,
|
| 15169 |
+
"step": 2161
|
| 15170 |
+
},
|
| 15171 |
+
{
|
| 15172 |
+
"epoch": 4.2896825396825395,
|
| 15173 |
+
"grad_norm": 409.6690368652344,
|
| 15174 |
+
"learning_rate": 8.886138613861386e-06,
|
| 15175 |
+
"loss": 0.3735,
|
| 15176 |
+
"step": 2162
|
| 15177 |
+
},
|
| 15178 |
+
{
|
| 15179 |
+
"epoch": 4.291666666666667,
|
| 15180 |
+
"grad_norm": 299.3995666503906,
|
| 15181 |
+
"learning_rate": 8.861386138613862e-06,
|
| 15182 |
+
"loss": 0.1387,
|
| 15183 |
+
"step": 2163
|
| 15184 |
+
},
|
| 15185 |
+
{
|
| 15186 |
+
"epoch": 4.2936507936507935,
|
| 15187 |
+
"grad_norm": 68.01351165771484,
|
| 15188 |
+
"learning_rate": 8.836633663366336e-06,
|
| 15189 |
+
"loss": 0.1181,
|
| 15190 |
+
"step": 2164
|
| 15191 |
+
},
|
| 15192 |
+
{
|
| 15193 |
+
"epoch": 4.295634920634921,
|
| 15194 |
+
"grad_norm": 1020.0321655273438,
|
| 15195 |
+
"learning_rate": 8.811881188118812e-06,
|
| 15196 |
+
"loss": 0.1366,
|
| 15197 |
+
"step": 2165
|
| 15198 |
+
},
|
| 15199 |
+
{
|
| 15200 |
+
"epoch": 4.2976190476190474,
|
| 15201 |
+
"grad_norm": 411.4695739746094,
|
| 15202 |
+
"learning_rate": 8.787128712871288e-06,
|
| 15203 |
+
"loss": 0.099,
|
| 15204 |
+
"step": 2166
|
| 15205 |
+
},
|
| 15206 |
+
{
|
| 15207 |
+
"epoch": 4.299603174603175,
|
| 15208 |
+
"grad_norm": 817.6732788085938,
|
| 15209 |
+
"learning_rate": 8.762376237623762e-06,
|
| 15210 |
+
"loss": 0.1563,
|
| 15211 |
+
"step": 2167
|
| 15212 |
+
},
|
| 15213 |
+
{
|
| 15214 |
+
"epoch": 4.301587301587301,
|
| 15215 |
+
"grad_norm": 748.29052734375,
|
| 15216 |
+
"learning_rate": 8.737623762376238e-06,
|
| 15217 |
+
"loss": 0.1744,
|
| 15218 |
+
"step": 2168
|
| 15219 |
+
},
|
| 15220 |
+
{
|
| 15221 |
+
"epoch": 4.303571428571429,
|
| 15222 |
+
"grad_norm": 979.2228393554688,
|
| 15223 |
+
"learning_rate": 8.712871287128712e-06,
|
| 15224 |
+
"loss": 0.146,
|
| 15225 |
+
"step": 2169
|
| 15226 |
+
},
|
| 15227 |
+
{
|
| 15228 |
+
"epoch": 4.305555555555555,
|
| 15229 |
+
"grad_norm": 1130.86572265625,
|
| 15230 |
+
"learning_rate": 8.688118811881188e-06,
|
| 15231 |
+
"loss": 0.1403,
|
| 15232 |
+
"step": 2170
|
| 15233 |
+
},
|
| 15234 |
+
{
|
| 15235 |
+
"epoch": 4.307539682539683,
|
| 15236 |
+
"grad_norm": 1465.18603515625,
|
| 15237 |
+
"learning_rate": 8.663366336633663e-06,
|
| 15238 |
+
"loss": 0.1656,
|
| 15239 |
+
"step": 2171
|
| 15240 |
+
},
|
| 15241 |
+
{
|
| 15242 |
+
"epoch": 4.309523809523809,
|
| 15243 |
+
"grad_norm": 766.5324096679688,
|
| 15244 |
+
"learning_rate": 8.638613861386139e-06,
|
| 15245 |
+
"loss": 0.1288,
|
| 15246 |
+
"step": 2172
|
| 15247 |
+
},
|
| 15248 |
+
{
|
| 15249 |
+
"epoch": 4.311507936507937,
|
| 15250 |
+
"grad_norm": 98.52468872070312,
|
| 15251 |
+
"learning_rate": 8.613861386138613e-06,
|
| 15252 |
+
"loss": 0.1137,
|
| 15253 |
+
"step": 2173
|
| 15254 |
+
},
|
| 15255 |
+
{
|
| 15256 |
+
"epoch": 4.313492063492063,
|
| 15257 |
+
"grad_norm": 466.1573181152344,
|
| 15258 |
+
"learning_rate": 8.58910891089109e-06,
|
| 15259 |
+
"loss": 0.2095,
|
| 15260 |
+
"step": 2174
|
| 15261 |
+
},
|
| 15262 |
+
{
|
| 15263 |
+
"epoch": 4.315476190476191,
|
| 15264 |
+
"grad_norm": 256.92791748046875,
|
| 15265 |
+
"learning_rate": 8.564356435643565e-06,
|
| 15266 |
+
"loss": 0.1345,
|
| 15267 |
+
"step": 2175
|
| 15268 |
+
},
|
| 15269 |
+
{
|
| 15270 |
+
"epoch": 4.317460317460317,
|
| 15271 |
+
"grad_norm": 169.274169921875,
|
| 15272 |
+
"learning_rate": 8.53960396039604e-06,
|
| 15273 |
+
"loss": 0.1209,
|
| 15274 |
+
"step": 2176
|
| 15275 |
+
},
|
| 15276 |
+
{
|
| 15277 |
+
"epoch": 4.319444444444445,
|
| 15278 |
+
"grad_norm": 4275.5087890625,
|
| 15279 |
+
"learning_rate": 8.514851485148517e-06,
|
| 15280 |
+
"loss": 0.1044,
|
| 15281 |
+
"step": 2177
|
| 15282 |
+
},
|
| 15283 |
+
{
|
| 15284 |
+
"epoch": 4.321428571428571,
|
| 15285 |
+
"grad_norm": 173.221435546875,
|
| 15286 |
+
"learning_rate": 8.490099009900991e-06,
|
| 15287 |
+
"loss": 0.1059,
|
| 15288 |
+
"step": 2178
|
| 15289 |
+
},
|
| 15290 |
+
{
|
| 15291 |
+
"epoch": 4.323412698412699,
|
| 15292 |
+
"grad_norm": 657.0520629882812,
|
| 15293 |
+
"learning_rate": 8.465346534653467e-06,
|
| 15294 |
+
"loss": 0.1309,
|
| 15295 |
+
"step": 2179
|
| 15296 |
+
},
|
| 15297 |
+
{
|
| 15298 |
+
"epoch": 4.325396825396825,
|
| 15299 |
+
"grad_norm": 80.0780029296875,
|
| 15300 |
+
"learning_rate": 8.440594059405941e-06,
|
| 15301 |
+
"loss": 0.098,
|
| 15302 |
+
"step": 2180
|
| 15303 |
+
},
|
| 15304 |
+
{
|
| 15305 |
+
"epoch": 4.3273809523809526,
|
| 15306 |
+
"grad_norm": 38.57035446166992,
|
| 15307 |
+
"learning_rate": 8.415841584158417e-06,
|
| 15308 |
+
"loss": 0.131,
|
| 15309 |
+
"step": 2181
|
| 15310 |
+
},
|
| 15311 |
+
{
|
| 15312 |
+
"epoch": 4.329365079365079,
|
| 15313 |
+
"grad_norm": 335.8876037597656,
|
| 15314 |
+
"learning_rate": 8.391089108910891e-06,
|
| 15315 |
+
"loss": 0.1659,
|
| 15316 |
+
"step": 2182
|
| 15317 |
+
},
|
| 15318 |
+
{
|
| 15319 |
+
"epoch": 4.3313492063492065,
|
| 15320 |
+
"grad_norm": 47.9968147277832,
|
| 15321 |
+
"learning_rate": 8.366336633663367e-06,
|
| 15322 |
+
"loss": 0.1186,
|
| 15323 |
+
"step": 2183
|
| 15324 |
+
},
|
| 15325 |
+
{
|
| 15326 |
+
"epoch": 4.333333333333333,
|
| 15327 |
+
"grad_norm": 987.3822631835938,
|
| 15328 |
+
"learning_rate": 8.341584158415842e-06,
|
| 15329 |
+
"loss": 0.1215,
|
| 15330 |
+
"step": 2184
|
| 15331 |
+
},
|
| 15332 |
+
{
|
| 15333 |
+
"epoch": 4.3353174603174605,
|
| 15334 |
+
"grad_norm": 114.58280944824219,
|
| 15335 |
+
"learning_rate": 8.316831683168318e-06,
|
| 15336 |
+
"loss": 0.1137,
|
| 15337 |
+
"step": 2185
|
| 15338 |
+
},
|
| 15339 |
+
{
|
| 15340 |
+
"epoch": 4.337301587301587,
|
| 15341 |
+
"grad_norm": 218.43724060058594,
|
| 15342 |
+
"learning_rate": 8.292079207920794e-06,
|
| 15343 |
+
"loss": 0.1589,
|
| 15344 |
+
"step": 2186
|
| 15345 |
+
},
|
| 15346 |
+
{
|
| 15347 |
+
"epoch": 4.339285714285714,
|
| 15348 |
+
"grad_norm": 59.1539421081543,
|
| 15349 |
+
"learning_rate": 8.267326732673268e-06,
|
| 15350 |
+
"loss": 0.142,
|
| 15351 |
+
"step": 2187
|
| 15352 |
+
},
|
| 15353 |
+
{
|
| 15354 |
+
"epoch": 4.341269841269841,
|
| 15355 |
+
"grad_norm": 414.4439392089844,
|
| 15356 |
+
"learning_rate": 8.242574257425744e-06,
|
| 15357 |
+
"loss": 0.2128,
|
| 15358 |
+
"step": 2188
|
| 15359 |
+
},
|
| 15360 |
+
{
|
| 15361 |
+
"epoch": 4.343253968253968,
|
| 15362 |
+
"grad_norm": 199.37249755859375,
|
| 15363 |
+
"learning_rate": 8.217821782178218e-06,
|
| 15364 |
+
"loss": 0.1413,
|
| 15365 |
+
"step": 2189
|
| 15366 |
+
},
|
| 15367 |
+
{
|
| 15368 |
+
"epoch": 4.345238095238095,
|
| 15369 |
+
"grad_norm": 749.605712890625,
|
| 15370 |
+
"learning_rate": 8.193069306930694e-06,
|
| 15371 |
+
"loss": 0.1338,
|
| 15372 |
+
"step": 2190
|
| 15373 |
+
},
|
| 15374 |
+
{
|
| 15375 |
+
"epoch": 4.347222222222222,
|
| 15376 |
+
"grad_norm": 160.30572509765625,
|
| 15377 |
+
"learning_rate": 8.168316831683168e-06,
|
| 15378 |
+
"loss": 0.1285,
|
| 15379 |
+
"step": 2191
|
| 15380 |
+
},
|
| 15381 |
+
{
|
| 15382 |
+
"epoch": 4.349206349206349,
|
| 15383 |
+
"grad_norm": 122.00711822509766,
|
| 15384 |
+
"learning_rate": 8.143564356435644e-06,
|
| 15385 |
+
"loss": 0.1307,
|
| 15386 |
+
"step": 2192
|
| 15387 |
+
},
|
| 15388 |
+
{
|
| 15389 |
+
"epoch": 4.351190476190476,
|
| 15390 |
+
"grad_norm": 165.81448364257812,
|
| 15391 |
+
"learning_rate": 8.118811881188119e-06,
|
| 15392 |
+
"loss": 0.0981,
|
| 15393 |
+
"step": 2193
|
| 15394 |
+
},
|
| 15395 |
+
{
|
| 15396 |
+
"epoch": 4.353174603174603,
|
| 15397 |
+
"grad_norm": 91.54843139648438,
|
| 15398 |
+
"learning_rate": 8.094059405940595e-06,
|
| 15399 |
+
"loss": 0.1321,
|
| 15400 |
+
"step": 2194
|
| 15401 |
+
},
|
| 15402 |
+
{
|
| 15403 |
+
"epoch": 4.35515873015873,
|
| 15404 |
+
"grad_norm": 164.43441772460938,
|
| 15405 |
+
"learning_rate": 8.06930693069307e-06,
|
| 15406 |
+
"loss": 0.1127,
|
| 15407 |
+
"step": 2195
|
| 15408 |
+
},
|
| 15409 |
+
{
|
| 15410 |
+
"epoch": 4.357142857142857,
|
| 15411 |
+
"grad_norm": 61.55143356323242,
|
| 15412 |
+
"learning_rate": 8.044554455445545e-06,
|
| 15413 |
+
"loss": 0.1279,
|
| 15414 |
+
"step": 2196
|
| 15415 |
+
},
|
| 15416 |
+
{
|
| 15417 |
+
"epoch": 4.359126984126984,
|
| 15418 |
+
"grad_norm": 397.53155517578125,
|
| 15419 |
+
"learning_rate": 8.019801980198021e-06,
|
| 15420 |
+
"loss": 0.1793,
|
| 15421 |
+
"step": 2197
|
| 15422 |
+
},
|
| 15423 |
+
{
|
| 15424 |
+
"epoch": 4.361111111111111,
|
| 15425 |
+
"grad_norm": 41.617488861083984,
|
| 15426 |
+
"learning_rate": 7.995049504950495e-06,
|
| 15427 |
+
"loss": 0.1236,
|
| 15428 |
+
"step": 2198
|
| 15429 |
+
},
|
| 15430 |
+
{
|
| 15431 |
+
"epoch": 4.363095238095238,
|
| 15432 |
+
"grad_norm": 173.76890563964844,
|
| 15433 |
+
"learning_rate": 7.970297029702971e-06,
|
| 15434 |
+
"loss": 0.1194,
|
| 15435 |
+
"step": 2199
|
| 15436 |
+
},
|
| 15437 |
+
{
|
| 15438 |
+
"epoch": 4.365079365079365,
|
| 15439 |
+
"grad_norm": 241.28482055664062,
|
| 15440 |
+
"learning_rate": 7.945544554455445e-06,
|
| 15441 |
+
"loss": 0.1411,
|
| 15442 |
+
"step": 2200
|
| 15443 |
+
},
|
| 15444 |
+
{
|
| 15445 |
+
"epoch": 4.367063492063492,
|
| 15446 |
+
"grad_norm": 104.19290924072266,
|
| 15447 |
+
"learning_rate": 7.920792079207921e-06,
|
| 15448 |
+
"loss": 0.1044,
|
| 15449 |
+
"step": 2201
|
| 15450 |
+
},
|
| 15451 |
+
{
|
| 15452 |
+
"epoch": 4.369047619047619,
|
| 15453 |
+
"grad_norm": 125.27460479736328,
|
| 15454 |
+
"learning_rate": 7.896039603960396e-06,
|
| 15455 |
+
"loss": 0.1282,
|
| 15456 |
+
"step": 2202
|
| 15457 |
+
},
|
| 15458 |
+
{
|
| 15459 |
+
"epoch": 4.371031746031746,
|
| 15460 |
+
"grad_norm": 238.3298797607422,
|
| 15461 |
+
"learning_rate": 7.871287128712872e-06,
|
| 15462 |
+
"loss": 0.1379,
|
| 15463 |
+
"step": 2203
|
| 15464 |
+
},
|
| 15465 |
+
{
|
| 15466 |
+
"epoch": 4.3730158730158735,
|
| 15467 |
+
"grad_norm": 52.062538146972656,
|
| 15468 |
+
"learning_rate": 7.846534653465348e-06,
|
| 15469 |
+
"loss": 0.1256,
|
| 15470 |
+
"step": 2204
|
| 15471 |
+
},
|
| 15472 |
+
{
|
| 15473 |
+
"epoch": 4.375,
|
| 15474 |
+
"grad_norm": 32651.7890625,
|
| 15475 |
+
"learning_rate": 7.821782178217822e-06,
|
| 15476 |
+
"loss": 0.1404,
|
| 15477 |
+
"step": 2205
|
| 15478 |
+
},
|
| 15479 |
+
{
|
| 15480 |
+
"epoch": 4.3769841269841265,
|
| 15481 |
+
"grad_norm": 42.01081085205078,
|
| 15482 |
+
"learning_rate": 7.797029702970298e-06,
|
| 15483 |
+
"loss": 0.1107,
|
| 15484 |
+
"step": 2206
|
| 15485 |
+
},
|
| 15486 |
+
{
|
| 15487 |
+
"epoch": 4.378968253968254,
|
| 15488 |
+
"grad_norm": 139.5529327392578,
|
| 15489 |
+
"learning_rate": 7.772277227722772e-06,
|
| 15490 |
+
"loss": 0.1145,
|
| 15491 |
+
"step": 2207
|
| 15492 |
+
},
|
| 15493 |
+
{
|
| 15494 |
+
"epoch": 4.380952380952381,
|
| 15495 |
+
"grad_norm": 224.26959228515625,
|
| 15496 |
+
"learning_rate": 7.747524752475248e-06,
|
| 15497 |
+
"loss": 0.1214,
|
| 15498 |
+
"step": 2208
|
| 15499 |
+
},
|
| 15500 |
+
{
|
| 15501 |
+
"epoch": 4.382936507936508,
|
| 15502 |
+
"grad_norm": 29.624595642089844,
|
| 15503 |
+
"learning_rate": 7.722772277227722e-06,
|
| 15504 |
+
"loss": 0.185,
|
| 15505 |
+
"step": 2209
|
| 15506 |
+
},
|
| 15507 |
+
{
|
| 15508 |
+
"epoch": 4.384920634920635,
|
| 15509 |
+
"grad_norm": 166.69876098632812,
|
| 15510 |
+
"learning_rate": 7.698019801980198e-06,
|
| 15511 |
+
"loss": 0.126,
|
| 15512 |
+
"step": 2210
|
| 15513 |
+
},
|
| 15514 |
+
{
|
| 15515 |
+
"epoch": 4.386904761904762,
|
| 15516 |
+
"grad_norm": 114.56996154785156,
|
| 15517 |
+
"learning_rate": 7.673267326732674e-06,
|
| 15518 |
+
"loss": 0.1306,
|
| 15519 |
+
"step": 2211
|
| 15520 |
+
},
|
| 15521 |
+
{
|
| 15522 |
+
"epoch": 4.388888888888889,
|
| 15523 |
+
"grad_norm": 2407.4287109375,
|
| 15524 |
+
"learning_rate": 7.648514851485149e-06,
|
| 15525 |
+
"loss": 0.2485,
|
| 15526 |
+
"step": 2212
|
| 15527 |
+
},
|
| 15528 |
+
{
|
| 15529 |
+
"epoch": 4.390873015873016,
|
| 15530 |
+
"grad_norm": 193.4139862060547,
|
| 15531 |
+
"learning_rate": 7.623762376237624e-06,
|
| 15532 |
+
"loss": 0.1155,
|
| 15533 |
+
"step": 2213
|
| 15534 |
+
},
|
| 15535 |
+
{
|
| 15536 |
+
"epoch": 4.392857142857143,
|
| 15537 |
+
"grad_norm": 633.5051879882812,
|
| 15538 |
+
"learning_rate": 7.599009900990099e-06,
|
| 15539 |
+
"loss": 0.1179,
|
| 15540 |
+
"step": 2214
|
| 15541 |
+
},
|
| 15542 |
+
{
|
| 15543 |
+
"epoch": 4.39484126984127,
|
| 15544 |
+
"grad_norm": 276.4228820800781,
|
| 15545 |
+
"learning_rate": 7.574257425742575e-06,
|
| 15546 |
+
"loss": 0.1079,
|
| 15547 |
+
"step": 2215
|
| 15548 |
+
},
|
| 15549 |
+
{
|
| 15550 |
+
"epoch": 4.396825396825397,
|
| 15551 |
+
"grad_norm": 14440.5302734375,
|
| 15552 |
+
"learning_rate": 7.54950495049505e-06,
|
| 15553 |
+
"loss": 0.1953,
|
| 15554 |
+
"step": 2216
|
| 15555 |
+
},
|
| 15556 |
+
{
|
| 15557 |
+
"epoch": 4.398809523809524,
|
| 15558 |
+
"grad_norm": 1272.780029296875,
|
| 15559 |
+
"learning_rate": 7.524752475247525e-06,
|
| 15560 |
+
"loss": 0.1281,
|
| 15561 |
+
"step": 2217
|
| 15562 |
+
},
|
| 15563 |
+
{
|
| 15564 |
+
"epoch": 4.400793650793651,
|
| 15565 |
+
"grad_norm": 568.2249755859375,
|
| 15566 |
+
"learning_rate": 7.5e-06,
|
| 15567 |
+
"loss": 0.1552,
|
| 15568 |
+
"step": 2218
|
| 15569 |
+
},
|
| 15570 |
+
{
|
| 15571 |
+
"epoch": 4.402777777777778,
|
| 15572 |
+
"grad_norm": 143.18032836914062,
|
| 15573 |
+
"learning_rate": 7.475247524752475e-06,
|
| 15574 |
+
"loss": 0.1426,
|
| 15575 |
+
"step": 2219
|
| 15576 |
+
},
|
| 15577 |
+
{
|
| 15578 |
+
"epoch": 4.404761904761905,
|
| 15579 |
+
"grad_norm": 257.44561767578125,
|
| 15580 |
+
"learning_rate": 7.4504950495049504e-06,
|
| 15581 |
+
"loss": 0.1281,
|
| 15582 |
+
"step": 2220
|
| 15583 |
+
},
|
| 15584 |
+
{
|
| 15585 |
+
"epoch": 4.406746031746032,
|
| 15586 |
+
"grad_norm": 837.1688232421875,
|
| 15587 |
+
"learning_rate": 7.4257425742574256e-06,
|
| 15588 |
+
"loss": 0.1554,
|
| 15589 |
+
"step": 2221
|
| 15590 |
+
},
|
| 15591 |
+
{
|
| 15592 |
+
"epoch": 4.408730158730159,
|
| 15593 |
+
"grad_norm": 264.7164306640625,
|
| 15594 |
+
"learning_rate": 7.400990099009901e-06,
|
| 15595 |
+
"loss": 0.1293,
|
| 15596 |
+
"step": 2222
|
| 15597 |
+
},
|
| 15598 |
+
{
|
| 15599 |
+
"epoch": 4.410714285714286,
|
| 15600 |
+
"grad_norm": 329.1595458984375,
|
| 15601 |
+
"learning_rate": 7.376237623762376e-06,
|
| 15602 |
+
"loss": 0.1355,
|
| 15603 |
+
"step": 2223
|
| 15604 |
+
},
|
| 15605 |
+
{
|
| 15606 |
+
"epoch": 4.412698412698413,
|
| 15607 |
+
"grad_norm": 268.7605285644531,
|
| 15608 |
+
"learning_rate": 7.351485148514852e-06,
|
| 15609 |
+
"loss": 0.1138,
|
| 15610 |
+
"step": 2224
|
| 15611 |
+
},
|
| 15612 |
+
{
|
| 15613 |
+
"epoch": 4.4146825396825395,
|
| 15614 |
+
"grad_norm": 498.3409729003906,
|
| 15615 |
+
"learning_rate": 7.326732673267327e-06,
|
| 15616 |
+
"loss": 0.1474,
|
| 15617 |
+
"step": 2225
|
| 15618 |
+
},
|
| 15619 |
+
{
|
| 15620 |
+
"epoch": 4.416666666666667,
|
| 15621 |
+
"grad_norm": 561.4034423828125,
|
| 15622 |
+
"learning_rate": 7.301980198019802e-06,
|
| 15623 |
+
"loss": 0.1173,
|
| 15624 |
+
"step": 2226
|
| 15625 |
+
},
|
| 15626 |
+
{
|
| 15627 |
+
"epoch": 4.4186507936507935,
|
| 15628 |
+
"grad_norm": 1570.9210205078125,
|
| 15629 |
+
"learning_rate": 7.277227722772277e-06,
|
| 15630 |
+
"loss": 0.2825,
|
| 15631 |
+
"step": 2227
|
| 15632 |
+
},
|
| 15633 |
+
{
|
| 15634 |
+
"epoch": 4.420634920634921,
|
| 15635 |
+
"grad_norm": 1429.0875244140625,
|
| 15636 |
+
"learning_rate": 7.252475247524752e-06,
|
| 15637 |
+
"loss": 0.1602,
|
| 15638 |
+
"step": 2228
|
| 15639 |
+
},
|
| 15640 |
+
{
|
| 15641 |
+
"epoch": 4.4226190476190474,
|
| 15642 |
+
"grad_norm": 175.19955444335938,
|
| 15643 |
+
"learning_rate": 7.227722772277227e-06,
|
| 15644 |
+
"loss": 0.1221,
|
| 15645 |
+
"step": 2229
|
| 15646 |
+
},
|
| 15647 |
+
{
|
| 15648 |
+
"epoch": 4.424603174603175,
|
| 15649 |
+
"grad_norm": 438.46832275390625,
|
| 15650 |
+
"learning_rate": 7.2029702970297025e-06,
|
| 15651 |
+
"loss": 0.1427,
|
| 15652 |
+
"step": 2230
|
| 15653 |
+
},
|
| 15654 |
+
{
|
| 15655 |
+
"epoch": 4.426587301587301,
|
| 15656 |
+
"grad_norm": 416.7275085449219,
|
| 15657 |
+
"learning_rate": 7.178217821782178e-06,
|
| 15658 |
+
"loss": 0.1591,
|
| 15659 |
+
"step": 2231
|
| 15660 |
+
},
|
| 15661 |
+
{
|
| 15662 |
+
"epoch": 4.428571428571429,
|
| 15663 |
+
"grad_norm": 2534.41845703125,
|
| 15664 |
+
"learning_rate": 7.153465346534654e-06,
|
| 15665 |
+
"loss": 0.1247,
|
| 15666 |
+
"step": 2232
|
| 15667 |
+
},
|
| 15668 |
+
{
|
| 15669 |
+
"epoch": 4.430555555555555,
|
| 15670 |
+
"grad_norm": 100.64867401123047,
|
| 15671 |
+
"learning_rate": 7.128712871287129e-06,
|
| 15672 |
+
"loss": 0.1625,
|
| 15673 |
+
"step": 2233
|
| 15674 |
+
},
|
| 15675 |
+
{
|
| 15676 |
+
"epoch": 4.432539682539683,
|
| 15677 |
+
"grad_norm": 581.3128051757812,
|
| 15678 |
+
"learning_rate": 7.103960396039604e-06,
|
| 15679 |
+
"loss": 0.1076,
|
| 15680 |
+
"step": 2234
|
| 15681 |
+
},
|
| 15682 |
+
{
|
| 15683 |
+
"epoch": 4.434523809523809,
|
| 15684 |
+
"grad_norm": 38.92487716674805,
|
| 15685 |
+
"learning_rate": 7.079207920792079e-06,
|
| 15686 |
+
"loss": 0.113,
|
| 15687 |
+
"step": 2235
|
| 15688 |
+
},
|
| 15689 |
+
{
|
| 15690 |
+
"epoch": 4.436507936507937,
|
| 15691 |
+
"grad_norm": 207.40234375,
|
| 15692 |
+
"learning_rate": 7.054455445544554e-06,
|
| 15693 |
+
"loss": 0.111,
|
| 15694 |
+
"step": 2236
|
| 15695 |
+
},
|
| 15696 |
+
{
|
| 15697 |
+
"epoch": 4.438492063492063,
|
| 15698 |
+
"grad_norm": 129.4462432861328,
|
| 15699 |
+
"learning_rate": 7.029702970297031e-06,
|
| 15700 |
+
"loss": 0.1554,
|
| 15701 |
+
"step": 2237
|
| 15702 |
+
},
|
| 15703 |
+
{
|
| 15704 |
+
"epoch": 4.440476190476191,
|
| 15705 |
+
"grad_norm": 359.9278259277344,
|
| 15706 |
+
"learning_rate": 7.004950495049506e-06,
|
| 15707 |
+
"loss": 0.1738,
|
| 15708 |
+
"step": 2238
|
| 15709 |
+
},
|
| 15710 |
+
{
|
| 15711 |
+
"epoch": 4.442460317460317,
|
| 15712 |
+
"grad_norm": 313.71588134765625,
|
| 15713 |
+
"learning_rate": 6.980198019801981e-06,
|
| 15714 |
+
"loss": 0.1718,
|
| 15715 |
+
"step": 2239
|
| 15716 |
+
},
|
| 15717 |
+
{
|
| 15718 |
+
"epoch": 4.444444444444445,
|
| 15719 |
+
"grad_norm": 978.9659423828125,
|
| 15720 |
+
"learning_rate": 6.955445544554456e-06,
|
| 15721 |
+
"loss": 0.1429,
|
| 15722 |
+
"step": 2240
|
| 15723 |
+
},
|
| 15724 |
+
{
|
| 15725 |
+
"epoch": 4.446428571428571,
|
| 15726 |
+
"grad_norm": 175.38475036621094,
|
| 15727 |
+
"learning_rate": 6.9306930693069314e-06,
|
| 15728 |
+
"loss": 0.1216,
|
| 15729 |
+
"step": 2241
|
| 15730 |
+
},
|
| 15731 |
+
{
|
| 15732 |
+
"epoch": 4.448412698412699,
|
| 15733 |
+
"grad_norm": 173.55010986328125,
|
| 15734 |
+
"learning_rate": 6.9059405940594066e-06,
|
| 15735 |
+
"loss": 0.1308,
|
| 15736 |
+
"step": 2242
|
| 15737 |
+
},
|
| 15738 |
+
{
|
| 15739 |
+
"epoch": 4.450396825396825,
|
| 15740 |
+
"grad_norm": 123.02428436279297,
|
| 15741 |
+
"learning_rate": 6.881188118811882e-06,
|
| 15742 |
+
"loss": 0.1094,
|
| 15743 |
+
"step": 2243
|
| 15744 |
+
},
|
| 15745 |
+
{
|
| 15746 |
+
"epoch": 4.4523809523809526,
|
| 15747 |
+
"grad_norm": 53.08387756347656,
|
| 15748 |
+
"learning_rate": 6.856435643564358e-06,
|
| 15749 |
+
"loss": 0.1572,
|
| 15750 |
+
"step": 2244
|
| 15751 |
+
},
|
| 15752 |
+
{
|
| 15753 |
+
"epoch": 4.454365079365079,
|
| 15754 |
+
"grad_norm": 566.4531860351562,
|
| 15755 |
+
"learning_rate": 6.831683168316833e-06,
|
| 15756 |
+
"loss": 0.1642,
|
| 15757 |
+
"step": 2245
|
| 15758 |
+
},
|
| 15759 |
+
{
|
| 15760 |
+
"epoch": 4.4563492063492065,
|
| 15761 |
+
"grad_norm": 497.5733642578125,
|
| 15762 |
+
"learning_rate": 6.806930693069308e-06,
|
| 15763 |
+
"loss": 0.1177,
|
| 15764 |
+
"step": 2246
|
| 15765 |
+
},
|
| 15766 |
+
{
|
| 15767 |
+
"epoch": 4.458333333333333,
|
| 15768 |
+
"grad_norm": 380.09173583984375,
|
| 15769 |
+
"learning_rate": 6.782178217821783e-06,
|
| 15770 |
+
"loss": 0.1044,
|
| 15771 |
+
"step": 2247
|
| 15772 |
+
},
|
| 15773 |
+
{
|
| 15774 |
+
"epoch": 4.4603174603174605,
|
| 15775 |
+
"grad_norm": 335.00799560546875,
|
| 15776 |
+
"learning_rate": 6.757425742574258e-06,
|
| 15777 |
+
"loss": 0.1402,
|
| 15778 |
+
"step": 2248
|
| 15779 |
+
},
|
| 15780 |
+
{
|
| 15781 |
+
"epoch": 4.462301587301587,
|
| 15782 |
+
"grad_norm": 321.2219543457031,
|
| 15783 |
+
"learning_rate": 6.732673267326733e-06,
|
| 15784 |
+
"loss": 0.1245,
|
| 15785 |
+
"step": 2249
|
| 15786 |
+
},
|
| 15787 |
+
{
|
| 15788 |
+
"epoch": 4.464285714285714,
|
| 15789 |
+
"grad_norm": 541.3793334960938,
|
| 15790 |
+
"learning_rate": 6.707920792079208e-06,
|
| 15791 |
+
"loss": 0.1438,
|
| 15792 |
+
"step": 2250
|
| 15793 |
+
},
|
| 15794 |
+
{
|
| 15795 |
+
"epoch": 4.466269841269841,
|
| 15796 |
+
"grad_norm": 117.61126708984375,
|
| 15797 |
+
"learning_rate": 6.6831683168316835e-06,
|
| 15798 |
+
"loss": 0.1041,
|
| 15799 |
+
"step": 2251
|
| 15800 |
+
},
|
| 15801 |
+
{
|
| 15802 |
+
"epoch": 4.468253968253968,
|
| 15803 |
+
"grad_norm": 24.132179260253906,
|
| 15804 |
+
"learning_rate": 6.6584158415841595e-06,
|
| 15805 |
+
"loss": 0.1235,
|
| 15806 |
+
"step": 2252
|
| 15807 |
+
},
|
| 15808 |
+
{
|
| 15809 |
+
"epoch": 4.470238095238095,
|
| 15810 |
+
"grad_norm": 257.79510498046875,
|
| 15811 |
+
"learning_rate": 6.633663366336635e-06,
|
| 15812 |
+
"loss": 0.1234,
|
| 15813 |
+
"step": 2253
|
| 15814 |
+
},
|
| 15815 |
+
{
|
| 15816 |
+
"epoch": 4.472222222222222,
|
| 15817 |
+
"grad_norm": 394.21368408203125,
|
| 15818 |
+
"learning_rate": 6.60891089108911e-06,
|
| 15819 |
+
"loss": 0.1579,
|
| 15820 |
+
"step": 2254
|
| 15821 |
+
},
|
| 15822 |
+
{
|
| 15823 |
+
"epoch": 4.474206349206349,
|
| 15824 |
+
"grad_norm": 148.26023864746094,
|
| 15825 |
+
"learning_rate": 6.584158415841585e-06,
|
| 15826 |
+
"loss": 0.121,
|
| 15827 |
+
"step": 2255
|
| 15828 |
+
},
|
| 15829 |
+
{
|
| 15830 |
+
"epoch": 4.476190476190476,
|
| 15831 |
+
"grad_norm": 167.20648193359375,
|
| 15832 |
+
"learning_rate": 6.55940594059406e-06,
|
| 15833 |
+
"loss": 0.1339,
|
| 15834 |
+
"step": 2256
|
| 15835 |
+
},
|
| 15836 |
+
{
|
| 15837 |
+
"epoch": 4.478174603174603,
|
| 15838 |
+
"grad_norm": 198.91152954101562,
|
| 15839 |
+
"learning_rate": 6.534653465346535e-06,
|
| 15840 |
+
"loss": 0.1223,
|
| 15841 |
+
"step": 2257
|
| 15842 |
+
},
|
| 15843 |
+
{
|
| 15844 |
+
"epoch": 4.48015873015873,
|
| 15845 |
+
"grad_norm": 116.70048522949219,
|
| 15846 |
+
"learning_rate": 6.50990099009901e-06,
|
| 15847 |
+
"loss": 0.1927,
|
| 15848 |
+
"step": 2258
|
| 15849 |
+
},
|
| 15850 |
+
{
|
| 15851 |
+
"epoch": 4.482142857142857,
|
| 15852 |
+
"grad_norm": 155.2935791015625,
|
| 15853 |
+
"learning_rate": 6.485148514851485e-06,
|
| 15854 |
+
"loss": 0.1422,
|
| 15855 |
+
"step": 2259
|
| 15856 |
+
},
|
| 15857 |
+
{
|
| 15858 |
+
"epoch": 4.484126984126984,
|
| 15859 |
+
"grad_norm": 269.98248291015625,
|
| 15860 |
+
"learning_rate": 6.4603960396039605e-06,
|
| 15861 |
+
"loss": 0.1306,
|
| 15862 |
+
"step": 2260
|
| 15863 |
+
},
|
| 15864 |
+
{
|
| 15865 |
+
"epoch": 4.486111111111111,
|
| 15866 |
+
"grad_norm": 157.9420166015625,
|
| 15867 |
+
"learning_rate": 6.4356435643564364e-06,
|
| 15868 |
+
"loss": 0.1253,
|
| 15869 |
+
"step": 2261
|
| 15870 |
+
},
|
| 15871 |
+
{
|
| 15872 |
+
"epoch": 4.488095238095238,
|
| 15873 |
+
"grad_norm": 215.1379852294922,
|
| 15874 |
+
"learning_rate": 6.4108910891089116e-06,
|
| 15875 |
+
"loss": 0.2573,
|
| 15876 |
+
"step": 2262
|
| 15877 |
+
},
|
| 15878 |
+
{
|
| 15879 |
+
"epoch": 4.490079365079365,
|
| 15880 |
+
"grad_norm": 985.1053466796875,
|
| 15881 |
+
"learning_rate": 6.386138613861387e-06,
|
| 15882 |
+
"loss": 0.1579,
|
| 15883 |
+
"step": 2263
|
| 15884 |
+
},
|
| 15885 |
+
{
|
| 15886 |
+
"epoch": 4.492063492063492,
|
| 15887 |
+
"grad_norm": 1822.2581787109375,
|
| 15888 |
+
"learning_rate": 6.361386138613862e-06,
|
| 15889 |
+
"loss": 0.113,
|
| 15890 |
+
"step": 2264
|
| 15891 |
+
},
|
| 15892 |
+
{
|
| 15893 |
+
"epoch": 4.494047619047619,
|
| 15894 |
+
"grad_norm": 136.7965087890625,
|
| 15895 |
+
"learning_rate": 6.336633663366337e-06,
|
| 15896 |
+
"loss": 0.143,
|
| 15897 |
+
"step": 2265
|
| 15898 |
+
},
|
| 15899 |
+
{
|
| 15900 |
+
"epoch": 4.496031746031746,
|
| 15901 |
+
"grad_norm": 458.3509826660156,
|
| 15902 |
+
"learning_rate": 6.311881188118812e-06,
|
| 15903 |
+
"loss": 0.1568,
|
| 15904 |
+
"step": 2266
|
| 15905 |
+
},
|
| 15906 |
+
{
|
| 15907 |
+
"epoch": 4.4980158730158735,
|
| 15908 |
+
"grad_norm": 704.4259033203125,
|
| 15909 |
+
"learning_rate": 6.287128712871287e-06,
|
| 15910 |
+
"loss": 0.1032,
|
| 15911 |
+
"step": 2267
|
| 15912 |
+
},
|
| 15913 |
+
{
|
| 15914 |
+
"epoch": 4.5,
|
| 15915 |
+
"grad_norm": 3009.530029296875,
|
| 15916 |
+
"learning_rate": 6.262376237623762e-06,
|
| 15917 |
+
"loss": 0.1235,
|
| 15918 |
+
"step": 2268
|
| 15919 |
+
},
|
| 15920 |
+
{
|
| 15921 |
+
"epoch": 4.5019841269841265,
|
| 15922 |
+
"grad_norm": 452.6950988769531,
|
| 15923 |
+
"learning_rate": 6.237623762376238e-06,
|
| 15924 |
+
"loss": 0.1471,
|
| 15925 |
+
"step": 2269
|
| 15926 |
+
},
|
| 15927 |
+
{
|
| 15928 |
+
"epoch": 4.503968253968254,
|
| 15929 |
+
"grad_norm": 27.798940658569336,
|
| 15930 |
+
"learning_rate": 6.212871287128713e-06,
|
| 15931 |
+
"loss": 0.0905,
|
| 15932 |
+
"step": 2270
|
| 15933 |
+
},
|
| 15934 |
+
{
|
| 15935 |
+
"epoch": 4.505952380952381,
|
| 15936 |
+
"grad_norm": 195.69998168945312,
|
| 15937 |
+
"learning_rate": 6.1881188118811885e-06,
|
| 15938 |
+
"loss": 0.1214,
|
| 15939 |
+
"step": 2271
|
| 15940 |
+
},
|
| 15941 |
+
{
|
| 15942 |
+
"epoch": 4.507936507936508,
|
| 15943 |
+
"grad_norm": 106.46649932861328,
|
| 15944 |
+
"learning_rate": 6.163366336633664e-06,
|
| 15945 |
+
"loss": 0.1322,
|
| 15946 |
+
"step": 2272
|
| 15947 |
+
},
|
| 15948 |
+
{
|
| 15949 |
+
"epoch": 4.509920634920634,
|
| 15950 |
+
"grad_norm": 20.548770904541016,
|
| 15951 |
+
"learning_rate": 6.138613861386139e-06,
|
| 15952 |
+
"loss": 0.1033,
|
| 15953 |
+
"step": 2273
|
| 15954 |
+
},
|
| 15955 |
+
{
|
| 15956 |
+
"epoch": 4.511904761904762,
|
| 15957 |
+
"grad_norm": 332.4073486328125,
|
| 15958 |
+
"learning_rate": 6.113861386138614e-06,
|
| 15959 |
+
"loss": 0.115,
|
| 15960 |
+
"step": 2274
|
| 15961 |
+
},
|
| 15962 |
+
{
|
| 15963 |
+
"epoch": 4.513888888888889,
|
| 15964 |
+
"grad_norm": 174.1964569091797,
|
| 15965 |
+
"learning_rate": 6.089108910891089e-06,
|
| 15966 |
+
"loss": 0.1034,
|
| 15967 |
+
"step": 2275
|
| 15968 |
+
},
|
| 15969 |
+
{
|
| 15970 |
+
"epoch": 4.515873015873016,
|
| 15971 |
+
"grad_norm": 430.2513122558594,
|
| 15972 |
+
"learning_rate": 6.064356435643564e-06,
|
| 15973 |
+
"loss": 0.1322,
|
| 15974 |
+
"step": 2276
|
| 15975 |
+
},
|
| 15976 |
+
{
|
| 15977 |
+
"epoch": 4.517857142857143,
|
| 15978 |
+
"grad_norm": 475.7851257324219,
|
| 15979 |
+
"learning_rate": 6.03960396039604e-06,
|
| 15980 |
+
"loss": 0.2077,
|
| 15981 |
+
"step": 2277
|
| 15982 |
+
},
|
| 15983 |
+
{
|
| 15984 |
+
"epoch": 4.51984126984127,
|
| 15985 |
+
"grad_norm": 121.18181610107422,
|
| 15986 |
+
"learning_rate": 6.014851485148515e-06,
|
| 15987 |
+
"loss": 0.1376,
|
| 15988 |
+
"step": 2278
|
| 15989 |
+
},
|
| 15990 |
+
{
|
| 15991 |
+
"epoch": 4.521825396825397,
|
| 15992 |
+
"grad_norm": 171.60646057128906,
|
| 15993 |
+
"learning_rate": 5.99009900990099e-06,
|
| 15994 |
+
"loss": 0.1372,
|
| 15995 |
+
"step": 2279
|
| 15996 |
+
},
|
| 15997 |
+
{
|
| 15998 |
+
"epoch": 4.523809523809524,
|
| 15999 |
+
"grad_norm": 133.42994689941406,
|
| 16000 |
+
"learning_rate": 5.9653465346534655e-06,
|
| 16001 |
+
"loss": 0.1207,
|
| 16002 |
+
"step": 2280
|
| 16003 |
+
},
|
| 16004 |
+
{
|
| 16005 |
+
"epoch": 4.525793650793651,
|
| 16006 |
+
"grad_norm": 8379.7734375,
|
| 16007 |
+
"learning_rate": 5.940594059405941e-06,
|
| 16008 |
+
"loss": 0.1165,
|
| 16009 |
+
"step": 2281
|
| 16010 |
+
},
|
| 16011 |
+
{
|
| 16012 |
+
"epoch": 4.527777777777778,
|
| 16013 |
+
"grad_norm": 49.69838333129883,
|
| 16014 |
+
"learning_rate": 5.915841584158416e-06,
|
| 16015 |
+
"loss": 0.111,
|
| 16016 |
+
"step": 2282
|
| 16017 |
+
},
|
| 16018 |
+
{
|
| 16019 |
+
"epoch": 4.529761904761905,
|
| 16020 |
+
"grad_norm": 320.11212158203125,
|
| 16021 |
+
"learning_rate": 5.891089108910891e-06,
|
| 16022 |
+
"loss": 0.1246,
|
| 16023 |
+
"step": 2283
|
| 16024 |
+
},
|
| 16025 |
+
{
|
| 16026 |
+
"epoch": 4.531746031746032,
|
| 16027 |
+
"grad_norm": 168.22882080078125,
|
| 16028 |
+
"learning_rate": 5.866336633663366e-06,
|
| 16029 |
+
"loss": 0.1113,
|
| 16030 |
+
"step": 2284
|
| 16031 |
+
},
|
| 16032 |
+
{
|
| 16033 |
+
"epoch": 4.533730158730159,
|
| 16034 |
+
"grad_norm": 76.41687774658203,
|
| 16035 |
+
"learning_rate": 5.841584158415842e-06,
|
| 16036 |
+
"loss": 0.1716,
|
| 16037 |
+
"step": 2285
|
| 16038 |
+
},
|
| 16039 |
+
{
|
| 16040 |
+
"epoch": 4.535714285714286,
|
| 16041 |
+
"grad_norm": 260.0563049316406,
|
| 16042 |
+
"learning_rate": 5.816831683168317e-06,
|
| 16043 |
+
"loss": 0.1488,
|
| 16044 |
+
"step": 2286
|
| 16045 |
+
},
|
| 16046 |
+
{
|
| 16047 |
+
"epoch": 4.537698412698413,
|
| 16048 |
+
"grad_norm": 1066.672607421875,
|
| 16049 |
+
"learning_rate": 5.792079207920793e-06,
|
| 16050 |
+
"loss": 0.1143,
|
| 16051 |
+
"step": 2287
|
| 16052 |
+
},
|
| 16053 |
+
{
|
| 16054 |
+
"epoch": 4.5396825396825395,
|
| 16055 |
+
"grad_norm": 18.335147857666016,
|
| 16056 |
+
"learning_rate": 5.767326732673268e-06,
|
| 16057 |
+
"loss": 0.0949,
|
| 16058 |
+
"step": 2288
|
| 16059 |
+
},
|
| 16060 |
+
{
|
| 16061 |
+
"epoch": 4.541666666666667,
|
| 16062 |
+
"grad_norm": 52.63270950317383,
|
| 16063 |
+
"learning_rate": 5.742574257425743e-06,
|
| 16064 |
+
"loss": 0.1196,
|
| 16065 |
+
"step": 2289
|
| 16066 |
+
},
|
| 16067 |
+
{
|
| 16068 |
+
"epoch": 4.5436507936507935,
|
| 16069 |
+
"grad_norm": 71.53189849853516,
|
| 16070 |
+
"learning_rate": 5.7178217821782184e-06,
|
| 16071 |
+
"loss": 0.1113,
|
| 16072 |
+
"step": 2290
|
| 16073 |
+
},
|
| 16074 |
+
{
|
| 16075 |
+
"epoch": 4.545634920634921,
|
| 16076 |
+
"grad_norm": 87.72516632080078,
|
| 16077 |
+
"learning_rate": 5.6930693069306936e-06,
|
| 16078 |
+
"loss": 0.1445,
|
| 16079 |
+
"step": 2291
|
| 16080 |
+
},
|
| 16081 |
+
{
|
| 16082 |
+
"epoch": 4.5476190476190474,
|
| 16083 |
+
"grad_norm": 45.04298400878906,
|
| 16084 |
+
"learning_rate": 5.668316831683169e-06,
|
| 16085 |
+
"loss": 0.0924,
|
| 16086 |
+
"step": 2292
|
| 16087 |
+
},
|
| 16088 |
+
{
|
| 16089 |
+
"epoch": 4.549603174603175,
|
| 16090 |
+
"grad_norm": 3194.39208984375,
|
| 16091 |
+
"learning_rate": 5.643564356435644e-06,
|
| 16092 |
+
"loss": 0.1177,
|
| 16093 |
+
"step": 2293
|
| 16094 |
+
},
|
| 16095 |
+
{
|
| 16096 |
+
"epoch": 4.551587301587301,
|
| 16097 |
+
"grad_norm": 59.8870849609375,
|
| 16098 |
+
"learning_rate": 5.618811881188119e-06,
|
| 16099 |
+
"loss": 0.126,
|
| 16100 |
+
"step": 2294
|
| 16101 |
+
},
|
| 16102 |
+
{
|
| 16103 |
+
"epoch": 4.553571428571429,
|
| 16104 |
+
"grad_norm": 222.3744659423828,
|
| 16105 |
+
"learning_rate": 5.594059405940594e-06,
|
| 16106 |
+
"loss": 0.1124,
|
| 16107 |
+
"step": 2295
|
| 16108 |
+
},
|
| 16109 |
+
{
|
| 16110 |
+
"epoch": 4.555555555555555,
|
| 16111 |
+
"grad_norm": 297.43646240234375,
|
| 16112 |
+
"learning_rate": 5.56930693069307e-06,
|
| 16113 |
+
"loss": 0.1141,
|
| 16114 |
+
"step": 2296
|
| 16115 |
+
},
|
| 16116 |
+
{
|
| 16117 |
+
"epoch": 4.557539682539683,
|
| 16118 |
+
"grad_norm": 277.83306884765625,
|
| 16119 |
+
"learning_rate": 5.544554455445545e-06,
|
| 16120 |
+
"loss": 0.1137,
|
| 16121 |
+
"step": 2297
|
| 16122 |
+
},
|
| 16123 |
+
{
|
| 16124 |
+
"epoch": 4.559523809523809,
|
| 16125 |
+
"grad_norm": 115.47888946533203,
|
| 16126 |
+
"learning_rate": 5.51980198019802e-06,
|
| 16127 |
+
"loss": 0.1523,
|
| 16128 |
+
"step": 2298
|
| 16129 |
+
},
|
| 16130 |
+
{
|
| 16131 |
+
"epoch": 4.561507936507937,
|
| 16132 |
+
"grad_norm": 176.40481567382812,
|
| 16133 |
+
"learning_rate": 5.495049504950495e-06,
|
| 16134 |
+
"loss": 0.0958,
|
| 16135 |
+
"step": 2299
|
| 16136 |
+
},
|
| 16137 |
+
{
|
| 16138 |
+
"epoch": 4.563492063492063,
|
| 16139 |
+
"grad_norm": 14.159869194030762,
|
| 16140 |
+
"learning_rate": 5.4702970297029705e-06,
|
| 16141 |
+
"loss": 0.1058,
|
| 16142 |
+
"step": 2300
|
| 16143 |
+
},
|
| 16144 |
+
{
|
| 16145 |
+
"epoch": 4.565476190476191,
|
| 16146 |
+
"grad_norm": 39.07661056518555,
|
| 16147 |
+
"learning_rate": 5.445544554455446e-06,
|
| 16148 |
+
"loss": 0.0995,
|
| 16149 |
+
"step": 2301
|
| 16150 |
+
},
|
| 16151 |
+
{
|
| 16152 |
+
"epoch": 4.567460317460317,
|
| 16153 |
+
"grad_norm": 50.13186264038086,
|
| 16154 |
+
"learning_rate": 5.420792079207921e-06,
|
| 16155 |
+
"loss": 0.1016,
|
| 16156 |
+
"step": 2302
|
| 16157 |
+
},
|
| 16158 |
+
{
|
| 16159 |
+
"epoch": 4.569444444444445,
|
| 16160 |
+
"grad_norm": 435.141357421875,
|
| 16161 |
+
"learning_rate": 5.396039603960396e-06,
|
| 16162 |
+
"loss": 0.1255,
|
| 16163 |
+
"step": 2303
|
| 16164 |
+
},
|
| 16165 |
+
{
|
| 16166 |
+
"epoch": 4.571428571428571,
|
| 16167 |
+
"grad_norm": 268.2369079589844,
|
| 16168 |
+
"learning_rate": 5.371287128712872e-06,
|
| 16169 |
+
"loss": 0.0972,
|
| 16170 |
+
"step": 2304
|
| 16171 |
+
},
|
| 16172 |
+
{
|
| 16173 |
+
"epoch": 4.573412698412699,
|
| 16174 |
+
"grad_norm": 75.26114654541016,
|
| 16175 |
+
"learning_rate": 5.346534653465347e-06,
|
| 16176 |
+
"loss": 0.1306,
|
| 16177 |
+
"step": 2305
|
| 16178 |
+
},
|
| 16179 |
+
{
|
| 16180 |
+
"epoch": 4.575396825396825,
|
| 16181 |
+
"grad_norm": 90.34429168701172,
|
| 16182 |
+
"learning_rate": 5.321782178217822e-06,
|
| 16183 |
+
"loss": 0.129,
|
| 16184 |
+
"step": 2306
|
| 16185 |
+
},
|
| 16186 |
+
{
|
| 16187 |
+
"epoch": 4.5773809523809526,
|
| 16188 |
+
"grad_norm": 679.5048217773438,
|
| 16189 |
+
"learning_rate": 5.297029702970297e-06,
|
| 16190 |
+
"loss": 0.1431,
|
| 16191 |
+
"step": 2307
|
| 16192 |
+
},
|
| 16193 |
+
{
|
| 16194 |
+
"epoch": 4.579365079365079,
|
| 16195 |
+
"grad_norm": 339.6603088378906,
|
| 16196 |
+
"learning_rate": 5.272277227722772e-06,
|
| 16197 |
+
"loss": 0.1247,
|
| 16198 |
+
"step": 2308
|
| 16199 |
+
},
|
| 16200 |
+
{
|
| 16201 |
+
"epoch": 4.5813492063492065,
|
| 16202 |
+
"grad_norm": 3.0373775959014893,
|
| 16203 |
+
"learning_rate": 5.2475247524752475e-06,
|
| 16204 |
+
"loss": 0.1174,
|
| 16205 |
+
"step": 2309
|
| 16206 |
+
},
|
| 16207 |
+
{
|
| 16208 |
+
"epoch": 4.583333333333333,
|
| 16209 |
+
"grad_norm": 66.98102569580078,
|
| 16210 |
+
"learning_rate": 5.222772277227723e-06,
|
| 16211 |
+
"loss": 0.1573,
|
| 16212 |
+
"step": 2310
|
| 16213 |
+
},
|
| 16214 |
+
{
|
| 16215 |
+
"epoch": 4.5853174603174605,
|
| 16216 |
+
"grad_norm": 579.1975708007812,
|
| 16217 |
+
"learning_rate": 5.198019801980198e-06,
|
| 16218 |
+
"loss": 0.1082,
|
| 16219 |
+
"step": 2311
|
| 16220 |
+
},
|
| 16221 |
+
{
|
| 16222 |
+
"epoch": 4.587301587301587,
|
| 16223 |
+
"grad_norm": 112.84606170654297,
|
| 16224 |
+
"learning_rate": 5.173267326732673e-06,
|
| 16225 |
+
"loss": 0.0945,
|
| 16226 |
+
"step": 2312
|
| 16227 |
+
},
|
| 16228 |
+
{
|
| 16229 |
+
"epoch": 4.589285714285714,
|
| 16230 |
+
"grad_norm": 65.5484848022461,
|
| 16231 |
+
"learning_rate": 5.148514851485149e-06,
|
| 16232 |
+
"loss": 0.1919,
|
| 16233 |
+
"step": 2313
|
| 16234 |
+
},
|
| 16235 |
+
{
|
| 16236 |
+
"epoch": 4.591269841269841,
|
| 16237 |
+
"grad_norm": 181.4154052734375,
|
| 16238 |
+
"learning_rate": 5.123762376237624e-06,
|
| 16239 |
+
"loss": 0.1193,
|
| 16240 |
+
"step": 2314
|
| 16241 |
+
},
|
| 16242 |
+
{
|
| 16243 |
+
"epoch": 4.593253968253968,
|
| 16244 |
+
"grad_norm": 415.4324951171875,
|
| 16245 |
+
"learning_rate": 5.099009900990099e-06,
|
| 16246 |
+
"loss": 0.1377,
|
| 16247 |
+
"step": 2315
|
| 16248 |
+
},
|
| 16249 |
+
{
|
| 16250 |
+
"epoch": 4.595238095238095,
|
| 16251 |
+
"grad_norm": 75.32178497314453,
|
| 16252 |
+
"learning_rate": 5.074257425742575e-06,
|
| 16253 |
+
"loss": 0.0959,
|
| 16254 |
+
"step": 2316
|
| 16255 |
+
},
|
| 16256 |
+
{
|
| 16257 |
+
"epoch": 4.597222222222222,
|
| 16258 |
+
"grad_norm": 52.82908248901367,
|
| 16259 |
+
"learning_rate": 5.04950495049505e-06,
|
| 16260 |
+
"loss": 0.1032,
|
| 16261 |
+
"step": 2317
|
| 16262 |
+
},
|
| 16263 |
+
{
|
| 16264 |
+
"epoch": 4.599206349206349,
|
| 16265 |
+
"grad_norm": 16.026365280151367,
|
| 16266 |
+
"learning_rate": 5.024752475247525e-06,
|
| 16267 |
+
"loss": 0.1142,
|
| 16268 |
+
"step": 2318
|
| 16269 |
+
},
|
| 16270 |
+
{
|
| 16271 |
+
"epoch": 4.601190476190476,
|
| 16272 |
+
"grad_norm": 157.17193603515625,
|
| 16273 |
+
"learning_rate": 5e-06,
|
| 16274 |
+
"loss": 0.2347,
|
| 16275 |
+
"step": 2319
|
| 16276 |
+
},
|
| 16277 |
+
{
|
| 16278 |
+
"epoch": 4.603174603174603,
|
| 16279 |
+
"grad_norm": 57.173553466796875,
|
| 16280 |
+
"learning_rate": 4.9752475247524755e-06,
|
| 16281 |
+
"loss": 0.1078,
|
| 16282 |
+
"step": 2320
|
| 16283 |
+
},
|
| 16284 |
+
{
|
| 16285 |
+
"epoch": 4.60515873015873,
|
| 16286 |
+
"grad_norm": 85.49403381347656,
|
| 16287 |
+
"learning_rate": 4.950495049504951e-06,
|
| 16288 |
+
"loss": 0.1282,
|
| 16289 |
+
"step": 2321
|
| 16290 |
+
},
|
| 16291 |
+
{
|
| 16292 |
+
"epoch": 4.607142857142857,
|
| 16293 |
+
"grad_norm": 6154.20703125,
|
| 16294 |
+
"learning_rate": 4.925742574257426e-06,
|
| 16295 |
+
"loss": 0.1476,
|
| 16296 |
+
"step": 2322
|
| 16297 |
+
},
|
| 16298 |
+
{
|
| 16299 |
+
"epoch": 4.609126984126984,
|
| 16300 |
+
"grad_norm": 231.51145935058594,
|
| 16301 |
+
"learning_rate": 4.900990099009902e-06,
|
| 16302 |
+
"loss": 0.1227,
|
| 16303 |
+
"step": 2323
|
| 16304 |
+
},
|
| 16305 |
+
{
|
| 16306 |
+
"epoch": 4.611111111111111,
|
| 16307 |
+
"grad_norm": 1642.306640625,
|
| 16308 |
+
"learning_rate": 4.876237623762377e-06,
|
| 16309 |
+
"loss": 0.1544,
|
| 16310 |
+
"step": 2324
|
| 16311 |
+
},
|
| 16312 |
+
{
|
| 16313 |
+
"epoch": 4.613095238095238,
|
| 16314 |
+
"grad_norm": 42.71038055419922,
|
| 16315 |
+
"learning_rate": 4.851485148514852e-06,
|
| 16316 |
+
"loss": 0.1104,
|
| 16317 |
+
"step": 2325
|
| 16318 |
+
},
|
| 16319 |
+
{
|
| 16320 |
+
"epoch": 4.615079365079366,
|
| 16321 |
+
"grad_norm": 80.55039978027344,
|
| 16322 |
+
"learning_rate": 4.826732673267327e-06,
|
| 16323 |
+
"loss": 0.1074,
|
| 16324 |
+
"step": 2326
|
| 16325 |
+
},
|
| 16326 |
+
{
|
| 16327 |
+
"epoch": 4.617063492063492,
|
| 16328 |
+
"grad_norm": 157.70257568359375,
|
| 16329 |
+
"learning_rate": 4.801980198019802e-06,
|
| 16330 |
+
"loss": 0.0846,
|
| 16331 |
+
"step": 2327
|
| 16332 |
+
},
|
| 16333 |
+
{
|
| 16334 |
+
"epoch": 4.619047619047619,
|
| 16335 |
+
"grad_norm": 185.60548400878906,
|
| 16336 |
+
"learning_rate": 4.777227722772277e-06,
|
| 16337 |
+
"loss": 0.1231,
|
| 16338 |
+
"step": 2328
|
| 16339 |
+
},
|
| 16340 |
+
{
|
| 16341 |
+
"epoch": 4.621031746031746,
|
| 16342 |
+
"grad_norm": 2144.7099609375,
|
| 16343 |
+
"learning_rate": 4.7524752475247525e-06,
|
| 16344 |
+
"loss": 0.13,
|
| 16345 |
+
"step": 2329
|
| 16346 |
+
},
|
| 16347 |
+
{
|
| 16348 |
+
"epoch": 4.6230158730158735,
|
| 16349 |
+
"grad_norm": 20.67472267150879,
|
| 16350 |
+
"learning_rate": 4.727722772277228e-06,
|
| 16351 |
+
"loss": 0.123,
|
| 16352 |
+
"step": 2330
|
| 16353 |
+
},
|
| 16354 |
+
{
|
| 16355 |
+
"epoch": 4.625,
|
| 16356 |
+
"grad_norm": 172.89031982421875,
|
| 16357 |
+
"learning_rate": 4.702970297029704e-06,
|
| 16358 |
+
"loss": 0.1076,
|
| 16359 |
+
"step": 2331
|
| 16360 |
+
},
|
| 16361 |
+
{
|
| 16362 |
+
"epoch": 4.6269841269841265,
|
| 16363 |
+
"grad_norm": 26.366361618041992,
|
| 16364 |
+
"learning_rate": 4.678217821782179e-06,
|
| 16365 |
+
"loss": 0.0835,
|
| 16366 |
+
"step": 2332
|
| 16367 |
+
},
|
| 16368 |
+
{
|
| 16369 |
+
"epoch": 4.628968253968254,
|
| 16370 |
+
"grad_norm": 476.0004577636719,
|
| 16371 |
+
"learning_rate": 4.653465346534654e-06,
|
| 16372 |
+
"loss": 0.1405,
|
| 16373 |
+
"step": 2333
|
| 16374 |
+
},
|
| 16375 |
+
{
|
| 16376 |
+
"epoch": 4.630952380952381,
|
| 16377 |
+
"grad_norm": 384.692626953125,
|
| 16378 |
+
"learning_rate": 4.628712871287129e-06,
|
| 16379 |
+
"loss": 0.1109,
|
| 16380 |
+
"step": 2334
|
| 16381 |
+
},
|
| 16382 |
+
{
|
| 16383 |
+
"epoch": 4.632936507936508,
|
| 16384 |
+
"grad_norm": 156.9302520751953,
|
| 16385 |
+
"learning_rate": 4.603960396039604e-06,
|
| 16386 |
+
"loss": 0.0907,
|
| 16387 |
+
"step": 2335
|
| 16388 |
+
},
|
| 16389 |
+
{
|
| 16390 |
+
"epoch": 4.634920634920634,
|
| 16391 |
+
"grad_norm": 168.79324340820312,
|
| 16392 |
+
"learning_rate": 4.579207920792079e-06,
|
| 16393 |
+
"loss": 0.1178,
|
| 16394 |
+
"step": 2336
|
| 16395 |
+
},
|
| 16396 |
+
{
|
| 16397 |
+
"epoch": 4.636904761904762,
|
| 16398 |
+
"grad_norm": 497.09912109375,
|
| 16399 |
+
"learning_rate": 4.554455445544554e-06,
|
| 16400 |
+
"loss": 0.0921,
|
| 16401 |
+
"step": 2337
|
| 16402 |
+
},
|
| 16403 |
+
{
|
| 16404 |
+
"epoch": 4.638888888888889,
|
| 16405 |
+
"grad_norm": 108.1347427368164,
|
| 16406 |
+
"learning_rate": 4.5297029702970295e-06,
|
| 16407 |
+
"loss": 0.1177,
|
| 16408 |
+
"step": 2338
|
| 16409 |
+
},
|
| 16410 |
+
{
|
| 16411 |
+
"epoch": 4.640873015873016,
|
| 16412 |
+
"grad_norm": 131.2079620361328,
|
| 16413 |
+
"learning_rate": 4.504950495049505e-06,
|
| 16414 |
+
"loss": 0.2638,
|
| 16415 |
+
"step": 2339
|
| 16416 |
+
},
|
| 16417 |
+
{
|
| 16418 |
+
"epoch": 4.642857142857143,
|
| 16419 |
+
"grad_norm": 58.77413558959961,
|
| 16420 |
+
"learning_rate": 4.4801980198019806e-06,
|
| 16421 |
+
"loss": 0.1001,
|
| 16422 |
+
"step": 2340
|
| 16423 |
+
},
|
| 16424 |
+
{
|
| 16425 |
+
"epoch": 4.64484126984127,
|
| 16426 |
+
"grad_norm": 235.69241333007812,
|
| 16427 |
+
"learning_rate": 4.455445544554456e-06,
|
| 16428 |
+
"loss": 0.1141,
|
| 16429 |
+
"step": 2341
|
| 16430 |
+
},
|
| 16431 |
+
{
|
| 16432 |
+
"epoch": 4.646825396825397,
|
| 16433 |
+
"grad_norm": 104.1804428100586,
|
| 16434 |
+
"learning_rate": 4.430693069306931e-06,
|
| 16435 |
+
"loss": 0.1196,
|
| 16436 |
+
"step": 2342
|
| 16437 |
+
},
|
| 16438 |
+
{
|
| 16439 |
+
"epoch": 4.648809523809524,
|
| 16440 |
+
"grad_norm": 16.738710403442383,
|
| 16441 |
+
"learning_rate": 4.405940594059406e-06,
|
| 16442 |
+
"loss": 0.1091,
|
| 16443 |
+
"step": 2343
|
| 16444 |
+
},
|
| 16445 |
+
{
|
| 16446 |
+
"epoch": 4.650793650793651,
|
| 16447 |
+
"grad_norm": 12.949176788330078,
|
| 16448 |
+
"learning_rate": 4.381188118811881e-06,
|
| 16449 |
+
"loss": 0.0961,
|
| 16450 |
+
"step": 2344
|
| 16451 |
+
},
|
| 16452 |
+
{
|
| 16453 |
+
"epoch": 4.652777777777778,
|
| 16454 |
+
"grad_norm": 76.58698272705078,
|
| 16455 |
+
"learning_rate": 4.356435643564356e-06,
|
| 16456 |
+
"loss": 0.1004,
|
| 16457 |
+
"step": 2345
|
| 16458 |
+
},
|
| 16459 |
+
{
|
| 16460 |
+
"epoch": 4.654761904761905,
|
| 16461 |
+
"grad_norm": 13140.046875,
|
| 16462 |
+
"learning_rate": 4.331683168316831e-06,
|
| 16463 |
+
"loss": 0.1272,
|
| 16464 |
+
"step": 2346
|
| 16465 |
+
},
|
| 16466 |
+
{
|
| 16467 |
+
"epoch": 4.656746031746032,
|
| 16468 |
+
"grad_norm": 58.4626579284668,
|
| 16469 |
+
"learning_rate": 4.306930693069306e-06,
|
| 16470 |
+
"loss": 0.1466,
|
| 16471 |
+
"step": 2347
|
| 16472 |
+
},
|
| 16473 |
+
{
|
| 16474 |
+
"epoch": 4.658730158730159,
|
| 16475 |
+
"grad_norm": 138.07513427734375,
|
| 16476 |
+
"learning_rate": 4.282178217821782e-06,
|
| 16477 |
+
"loss": 0.1238,
|
| 16478 |
+
"step": 2348
|
| 16479 |
+
},
|
| 16480 |
+
{
|
| 16481 |
+
"epoch": 4.660714285714286,
|
| 16482 |
+
"grad_norm": 7.002724647521973,
|
| 16483 |
+
"learning_rate": 4.257425742574258e-06,
|
| 16484 |
+
"loss": 0.0965,
|
| 16485 |
+
"step": 2349
|
| 16486 |
+
},
|
| 16487 |
+
{
|
| 16488 |
+
"epoch": 4.662698412698413,
|
| 16489 |
+
"grad_norm": 189.8155975341797,
|
| 16490 |
+
"learning_rate": 4.2326732673267335e-06,
|
| 16491 |
+
"loss": 0.1269,
|
| 16492 |
+
"step": 2350
|
| 16493 |
+
},
|
| 16494 |
+
{
|
| 16495 |
+
"epoch": 4.6646825396825395,
|
| 16496 |
+
"grad_norm": 112.2702407836914,
|
| 16497 |
+
"learning_rate": 4.207920792079209e-06,
|
| 16498 |
+
"loss": 0.125,
|
| 16499 |
+
"step": 2351
|
| 16500 |
+
},
|
| 16501 |
+
{
|
| 16502 |
+
"epoch": 4.666666666666667,
|
| 16503 |
+
"grad_norm": 58.758358001708984,
|
| 16504 |
+
"learning_rate": 4.183168316831684e-06,
|
| 16505 |
+
"loss": 0.1083,
|
| 16506 |
+
"step": 2352
|
| 16507 |
+
},
|
| 16508 |
+
{
|
| 16509 |
+
"epoch": 4.6686507936507935,
|
| 16510 |
+
"grad_norm": 17.230209350585938,
|
| 16511 |
+
"learning_rate": 4.158415841584159e-06,
|
| 16512 |
+
"loss": 0.0963,
|
| 16513 |
+
"step": 2353
|
| 16514 |
+
},
|
| 16515 |
+
{
|
| 16516 |
+
"epoch": 4.670634920634921,
|
| 16517 |
+
"grad_norm": 104.45642852783203,
|
| 16518 |
+
"learning_rate": 4.133663366336634e-06,
|
| 16519 |
+
"loss": 0.1134,
|
| 16520 |
+
"step": 2354
|
| 16521 |
+
},
|
| 16522 |
+
{
|
| 16523 |
+
"epoch": 4.6726190476190474,
|
| 16524 |
+
"grad_norm": 322.62237548828125,
|
| 16525 |
+
"learning_rate": 4.108910891089109e-06,
|
| 16526 |
+
"loss": 0.2663,
|
| 16527 |
+
"step": 2355
|
| 16528 |
+
},
|
| 16529 |
+
{
|
| 16530 |
+
"epoch": 4.674603174603175,
|
| 16531 |
+
"grad_norm": 322.0111083984375,
|
| 16532 |
+
"learning_rate": 4.084158415841584e-06,
|
| 16533 |
+
"loss": 0.1037,
|
| 16534 |
+
"step": 2356
|
| 16535 |
+
},
|
| 16536 |
+
{
|
| 16537 |
+
"epoch": 4.676587301587301,
|
| 16538 |
+
"grad_norm": 29.670015335083008,
|
| 16539 |
+
"learning_rate": 4.059405940594059e-06,
|
| 16540 |
+
"loss": 0.1354,
|
| 16541 |
+
"step": 2357
|
| 16542 |
+
},
|
| 16543 |
+
{
|
| 16544 |
+
"epoch": 4.678571428571429,
|
| 16545 |
+
"grad_norm": 217.89886474609375,
|
| 16546 |
+
"learning_rate": 4.034653465346535e-06,
|
| 16547 |
+
"loss": 0.0934,
|
| 16548 |
+
"step": 2358
|
| 16549 |
+
},
|
| 16550 |
+
{
|
| 16551 |
+
"epoch": 4.680555555555555,
|
| 16552 |
+
"grad_norm": 17.31934356689453,
|
| 16553 |
+
"learning_rate": 4.0099009900990104e-06,
|
| 16554 |
+
"loss": 0.097,
|
| 16555 |
+
"step": 2359
|
| 16556 |
+
},
|
| 16557 |
+
{
|
| 16558 |
+
"epoch": 4.682539682539683,
|
| 16559 |
+
"grad_norm": 55.98748016357422,
|
| 16560 |
+
"learning_rate": 3.9851485148514856e-06,
|
| 16561 |
+
"loss": 0.1389,
|
| 16562 |
+
"step": 2360
|
| 16563 |
+
},
|
| 16564 |
+
{
|
| 16565 |
+
"epoch": 4.684523809523809,
|
| 16566 |
+
"grad_norm": 36.77737045288086,
|
| 16567 |
+
"learning_rate": 3.960396039603961e-06,
|
| 16568 |
+
"loss": 0.1137,
|
| 16569 |
+
"step": 2361
|
| 16570 |
+
},
|
| 16571 |
+
{
|
| 16572 |
+
"epoch": 4.686507936507937,
|
| 16573 |
+
"grad_norm": 47.89748764038086,
|
| 16574 |
+
"learning_rate": 3.935643564356436e-06,
|
| 16575 |
+
"loss": 0.1115,
|
| 16576 |
+
"step": 2362
|
| 16577 |
+
},
|
| 16578 |
+
{
|
| 16579 |
+
"epoch": 4.688492063492063,
|
| 16580 |
+
"grad_norm": 140.5443572998047,
|
| 16581 |
+
"learning_rate": 3.910891089108911e-06,
|
| 16582 |
+
"loss": 0.1121,
|
| 16583 |
+
"step": 2363
|
| 16584 |
+
},
|
| 16585 |
+
{
|
| 16586 |
+
"epoch": 4.690476190476191,
|
| 16587 |
+
"grad_norm": 163.18447875976562,
|
| 16588 |
+
"learning_rate": 3.886138613861386e-06,
|
| 16589 |
+
"loss": 0.1045,
|
| 16590 |
+
"step": 2364
|
| 16591 |
+
},
|
| 16592 |
+
{
|
| 16593 |
+
"epoch": 4.692460317460317,
|
| 16594 |
+
"grad_norm": 439.2802429199219,
|
| 16595 |
+
"learning_rate": 3.861386138613861e-06,
|
| 16596 |
+
"loss": 0.1224,
|
| 16597 |
+
"step": 2365
|
| 16598 |
+
},
|
| 16599 |
+
{
|
| 16600 |
+
"epoch": 4.694444444444445,
|
| 16601 |
+
"grad_norm": 98.07796478271484,
|
| 16602 |
+
"learning_rate": 3.836633663366337e-06,
|
| 16603 |
+
"loss": 0.1065,
|
| 16604 |
+
"step": 2366
|
| 16605 |
+
},
|
| 16606 |
+
{
|
| 16607 |
+
"epoch": 4.696428571428571,
|
| 16608 |
+
"grad_norm": 427.74456787109375,
|
| 16609 |
+
"learning_rate": 3.811881188118812e-06,
|
| 16610 |
+
"loss": 0.1139,
|
| 16611 |
+
"step": 2367
|
| 16612 |
+
},
|
| 16613 |
+
{
|
| 16614 |
+
"epoch": 4.698412698412699,
|
| 16615 |
+
"grad_norm": 248.5419158935547,
|
| 16616 |
+
"learning_rate": 3.7871287128712874e-06,
|
| 16617 |
+
"loss": 0.0787,
|
| 16618 |
+
"step": 2368
|
| 16619 |
+
},
|
| 16620 |
+
{
|
| 16621 |
+
"epoch": 4.700396825396825,
|
| 16622 |
+
"grad_norm": 29.97829818725586,
|
| 16623 |
+
"learning_rate": 3.7623762376237625e-06,
|
| 16624 |
+
"loss": 0.1144,
|
| 16625 |
+
"step": 2369
|
| 16626 |
+
},
|
| 16627 |
+
{
|
| 16628 |
+
"epoch": 4.7023809523809526,
|
| 16629 |
+
"grad_norm": 193.80911254882812,
|
| 16630 |
+
"learning_rate": 3.7376237623762377e-06,
|
| 16631 |
+
"loss": 0.1041,
|
| 16632 |
+
"step": 2370
|
| 16633 |
+
},
|
| 16634 |
+
{
|
| 16635 |
+
"epoch": 4.704365079365079,
|
| 16636 |
+
"grad_norm": 200.8865966796875,
|
| 16637 |
+
"learning_rate": 3.7128712871287128e-06,
|
| 16638 |
+
"loss": 0.1129,
|
| 16639 |
+
"step": 2371
|
| 16640 |
+
},
|
| 16641 |
+
{
|
| 16642 |
+
"epoch": 4.7063492063492065,
|
| 16643 |
+
"grad_norm": 25.79678726196289,
|
| 16644 |
+
"learning_rate": 3.688118811881188e-06,
|
| 16645 |
+
"loss": 0.1233,
|
| 16646 |
+
"step": 2372
|
| 16647 |
+
},
|
| 16648 |
+
{
|
| 16649 |
+
"epoch": 4.708333333333333,
|
| 16650 |
+
"grad_norm": 16.433944702148438,
|
| 16651 |
+
"learning_rate": 3.6633663366336635e-06,
|
| 16652 |
+
"loss": 0.1173,
|
| 16653 |
+
"step": 2373
|
| 16654 |
+
},
|
| 16655 |
+
{
|
| 16656 |
+
"epoch": 4.7103174603174605,
|
| 16657 |
+
"grad_norm": 78.91058349609375,
|
| 16658 |
+
"learning_rate": 3.6386138613861386e-06,
|
| 16659 |
+
"loss": 0.1239,
|
| 16660 |
+
"step": 2374
|
| 16661 |
+
},
|
| 16662 |
+
{
|
| 16663 |
+
"epoch": 4.712301587301587,
|
| 16664 |
+
"grad_norm": 68.58326721191406,
|
| 16665 |
+
"learning_rate": 3.6138613861386137e-06,
|
| 16666 |
+
"loss": 0.1109,
|
| 16667 |
+
"step": 2375
|
| 16668 |
+
},
|
| 16669 |
+
{
|
| 16670 |
+
"epoch": 4.714285714285714,
|
| 16671 |
+
"grad_norm": 99.71257781982422,
|
| 16672 |
+
"learning_rate": 3.589108910891089e-06,
|
| 16673 |
+
"loss": 0.1114,
|
| 16674 |
+
"step": 2376
|
| 16675 |
+
},
|
| 16676 |
+
{
|
| 16677 |
+
"epoch": 4.716269841269841,
|
| 16678 |
+
"grad_norm": 76.21195220947266,
|
| 16679 |
+
"learning_rate": 3.5643564356435644e-06,
|
| 16680 |
+
"loss": 0.0884,
|
| 16681 |
+
"step": 2377
|
| 16682 |
+
},
|
| 16683 |
+
{
|
| 16684 |
+
"epoch": 4.718253968253968,
|
| 16685 |
+
"grad_norm": 29.15667152404785,
|
| 16686 |
+
"learning_rate": 3.5396039603960395e-06,
|
| 16687 |
+
"loss": 0.1124,
|
| 16688 |
+
"step": 2378
|
| 16689 |
+
},
|
| 16690 |
+
{
|
| 16691 |
+
"epoch": 4.720238095238095,
|
| 16692 |
+
"grad_norm": 1397.8233642578125,
|
| 16693 |
+
"learning_rate": 3.5148514851485155e-06,
|
| 16694 |
+
"loss": 0.1026,
|
| 16695 |
+
"step": 2379
|
| 16696 |
+
},
|
| 16697 |
+
{
|
| 16698 |
+
"epoch": 4.722222222222222,
|
| 16699 |
+
"grad_norm": 35.59405517578125,
|
| 16700 |
+
"learning_rate": 3.4900990099009906e-06,
|
| 16701 |
+
"loss": 0.1216,
|
| 16702 |
+
"step": 2380
|
| 16703 |
+
},
|
| 16704 |
+
{
|
| 16705 |
+
"epoch": 4.724206349206349,
|
| 16706 |
+
"grad_norm": 24.641639709472656,
|
| 16707 |
+
"learning_rate": 3.4653465346534657e-06,
|
| 16708 |
+
"loss": 0.0917,
|
| 16709 |
+
"step": 2381
|
| 16710 |
+
},
|
| 16711 |
+
{
|
| 16712 |
+
"epoch": 4.726190476190476,
|
| 16713 |
+
"grad_norm": 1105.272216796875,
|
| 16714 |
+
"learning_rate": 3.440594059405941e-06,
|
| 16715 |
+
"loss": 0.0979,
|
| 16716 |
+
"step": 2382
|
| 16717 |
+
},
|
| 16718 |
+
{
|
| 16719 |
+
"epoch": 4.728174603174603,
|
| 16720 |
+
"grad_norm": 88.4856948852539,
|
| 16721 |
+
"learning_rate": 3.4158415841584164e-06,
|
| 16722 |
+
"loss": 0.0958,
|
| 16723 |
+
"step": 2383
|
| 16724 |
+
},
|
| 16725 |
+
{
|
| 16726 |
+
"epoch": 4.73015873015873,
|
| 16727 |
+
"grad_norm": 118.49017333984375,
|
| 16728 |
+
"learning_rate": 3.3910891089108915e-06,
|
| 16729 |
+
"loss": 0.1443,
|
| 16730 |
+
"step": 2384
|
| 16731 |
+
},
|
| 16732 |
+
{
|
| 16733 |
+
"epoch": 4.732142857142857,
|
| 16734 |
+
"grad_norm": 40.44411087036133,
|
| 16735 |
+
"learning_rate": 3.3663366336633666e-06,
|
| 16736 |
+
"loss": 0.0792,
|
| 16737 |
+
"step": 2385
|
| 16738 |
+
},
|
| 16739 |
+
{
|
| 16740 |
+
"epoch": 4.734126984126984,
|
| 16741 |
+
"grad_norm": 151.79107666015625,
|
| 16742 |
+
"learning_rate": 3.3415841584158418e-06,
|
| 16743 |
+
"loss": 0.1288,
|
| 16744 |
+
"step": 2386
|
| 16745 |
+
},
|
| 16746 |
+
{
|
| 16747 |
+
"epoch": 4.736111111111111,
|
| 16748 |
+
"grad_norm": 94.86109161376953,
|
| 16749 |
+
"learning_rate": 3.3168316831683173e-06,
|
| 16750 |
+
"loss": 0.1244,
|
| 16751 |
+
"step": 2387
|
| 16752 |
+
},
|
| 16753 |
+
{
|
| 16754 |
+
"epoch": 4.738095238095238,
|
| 16755 |
+
"grad_norm": 142.2732696533203,
|
| 16756 |
+
"learning_rate": 3.2920792079207924e-06,
|
| 16757 |
+
"loss": 0.1124,
|
| 16758 |
+
"step": 2388
|
| 16759 |
+
},
|
| 16760 |
+
{
|
| 16761 |
+
"epoch": 4.740079365079366,
|
| 16762 |
+
"grad_norm": 152.5814666748047,
|
| 16763 |
+
"learning_rate": 3.2673267326732676e-06,
|
| 16764 |
+
"loss": 0.1181,
|
| 16765 |
+
"step": 2389
|
| 16766 |
+
},
|
| 16767 |
+
{
|
| 16768 |
+
"epoch": 4.742063492063492,
|
| 16769 |
+
"grad_norm": 0.970557451248169,
|
| 16770 |
+
"learning_rate": 3.2425742574257427e-06,
|
| 16771 |
+
"loss": 0.1047,
|
| 16772 |
+
"step": 2390
|
| 16773 |
+
},
|
| 16774 |
+
{
|
| 16775 |
+
"epoch": 4.744047619047619,
|
| 16776 |
+
"grad_norm": 122.67098999023438,
|
| 16777 |
+
"learning_rate": 3.2178217821782182e-06,
|
| 16778 |
+
"loss": 0.163,
|
| 16779 |
+
"step": 2391
|
| 16780 |
+
},
|
| 16781 |
+
{
|
| 16782 |
+
"epoch": 4.746031746031746,
|
| 16783 |
+
"grad_norm": 27.92265510559082,
|
| 16784 |
+
"learning_rate": 3.1930693069306933e-06,
|
| 16785 |
+
"loss": 0.1044,
|
| 16786 |
+
"step": 2392
|
| 16787 |
+
},
|
| 16788 |
+
{
|
| 16789 |
+
"epoch": 4.7480158730158735,
|
| 16790 |
+
"grad_norm": 2.400805711746216,
|
| 16791 |
+
"learning_rate": 3.1683168316831685e-06,
|
| 16792 |
+
"loss": 0.1963,
|
| 16793 |
+
"step": 2393
|
| 16794 |
+
},
|
| 16795 |
+
{
|
| 16796 |
+
"epoch": 4.75,
|
| 16797 |
+
"grad_norm": 95.16465759277344,
|
| 16798 |
+
"learning_rate": 3.1435643564356436e-06,
|
| 16799 |
+
"loss": 0.102,
|
| 16800 |
+
"step": 2394
|
| 16801 |
+
},
|
| 16802 |
+
{
|
| 16803 |
+
"epoch": 4.7519841269841265,
|
| 16804 |
+
"grad_norm": 3563.260986328125,
|
| 16805 |
+
"learning_rate": 3.118811881188119e-06,
|
| 16806 |
+
"loss": 0.112,
|
| 16807 |
+
"step": 2395
|
| 16808 |
+
},
|
| 16809 |
+
{
|
| 16810 |
+
"epoch": 4.753968253968254,
|
| 16811 |
+
"grad_norm": 33.85247039794922,
|
| 16812 |
+
"learning_rate": 3.0940594059405943e-06,
|
| 16813 |
+
"loss": 0.0971,
|
| 16814 |
+
"step": 2396
|
| 16815 |
+
},
|
| 16816 |
+
{
|
| 16817 |
+
"epoch": 4.755952380952381,
|
| 16818 |
+
"grad_norm": 31.06768798828125,
|
| 16819 |
+
"learning_rate": 3.0693069306930694e-06,
|
| 16820 |
+
"loss": 0.0852,
|
| 16821 |
+
"step": 2397
|
| 16822 |
+
},
|
| 16823 |
+
{
|
| 16824 |
+
"epoch": 4.757936507936508,
|
| 16825 |
+
"grad_norm": 123.3304672241211,
|
| 16826 |
+
"learning_rate": 3.0445544554455445e-06,
|
| 16827 |
+
"loss": 0.1006,
|
| 16828 |
+
"step": 2398
|
| 16829 |
+
},
|
| 16830 |
+
{
|
| 16831 |
+
"epoch": 4.759920634920634,
|
| 16832 |
+
"grad_norm": 32.31071472167969,
|
| 16833 |
+
"learning_rate": 3.01980198019802e-06,
|
| 16834 |
+
"loss": 0.1079,
|
| 16835 |
+
"step": 2399
|
| 16836 |
+
},
|
| 16837 |
+
{
|
| 16838 |
+
"epoch": 4.761904761904762,
|
| 16839 |
+
"grad_norm": 83.55545043945312,
|
| 16840 |
+
"learning_rate": 2.995049504950495e-06,
|
| 16841 |
+
"loss": 0.1063,
|
| 16842 |
+
"step": 2400
|
| 16843 |
+
},
|
| 16844 |
+
{
|
| 16845 |
+
"epoch": 4.763888888888889,
|
| 16846 |
+
"grad_norm": 47.01555252075195,
|
| 16847 |
+
"learning_rate": 2.9702970297029703e-06,
|
| 16848 |
+
"loss": 0.112,
|
| 16849 |
+
"step": 2401
|
| 16850 |
+
},
|
| 16851 |
+
{
|
| 16852 |
+
"epoch": 4.765873015873016,
|
| 16853 |
+
"grad_norm": 31.080564498901367,
|
| 16854 |
+
"learning_rate": 2.9455445544554454e-06,
|
| 16855 |
+
"loss": 0.1611,
|
| 16856 |
+
"step": 2402
|
| 16857 |
+
},
|
| 16858 |
+
{
|
| 16859 |
+
"epoch": 4.767857142857143,
|
| 16860 |
+
"grad_norm": 4.679503440856934,
|
| 16861 |
+
"learning_rate": 2.920792079207921e-06,
|
| 16862 |
+
"loss": 0.1109,
|
| 16863 |
+
"step": 2403
|
| 16864 |
+
},
|
| 16865 |
+
{
|
| 16866 |
+
"epoch": 4.76984126984127,
|
| 16867 |
+
"grad_norm": 43.96272659301758,
|
| 16868 |
+
"learning_rate": 2.8960396039603965e-06,
|
| 16869 |
+
"loss": 0.1263,
|
| 16870 |
+
"step": 2404
|
| 16871 |
+
},
|
| 16872 |
+
{
|
| 16873 |
+
"epoch": 4.771825396825397,
|
| 16874 |
+
"grad_norm": 64.4210205078125,
|
| 16875 |
+
"learning_rate": 2.8712871287128717e-06,
|
| 16876 |
+
"loss": 0.1255,
|
| 16877 |
+
"step": 2405
|
| 16878 |
+
},
|
| 16879 |
+
{
|
| 16880 |
+
"epoch": 4.773809523809524,
|
| 16881 |
+
"grad_norm": 69.04649353027344,
|
| 16882 |
+
"learning_rate": 2.8465346534653468e-06,
|
| 16883 |
+
"loss": 0.1162,
|
| 16884 |
+
"step": 2406
|
| 16885 |
+
},
|
| 16886 |
+
{
|
| 16887 |
+
"epoch": 4.775793650793651,
|
| 16888 |
+
"grad_norm": 42.12254333496094,
|
| 16889 |
+
"learning_rate": 2.821782178217822e-06,
|
| 16890 |
+
"loss": 0.1357,
|
| 16891 |
+
"step": 2407
|
| 16892 |
+
},
|
| 16893 |
+
{
|
| 16894 |
+
"epoch": 4.777777777777778,
|
| 16895 |
+
"grad_norm": 62.59967041015625,
|
| 16896 |
+
"learning_rate": 2.797029702970297e-06,
|
| 16897 |
+
"loss": 0.125,
|
| 16898 |
+
"step": 2408
|
| 16899 |
+
},
|
| 16900 |
+
{
|
| 16901 |
+
"epoch": 4.779761904761905,
|
| 16902 |
+
"grad_norm": 140.9676971435547,
|
| 16903 |
+
"learning_rate": 2.7722772277227726e-06,
|
| 16904 |
+
"loss": 0.1175,
|
| 16905 |
+
"step": 2409
|
| 16906 |
+
},
|
| 16907 |
+
{
|
| 16908 |
+
"epoch": 4.781746031746032,
|
| 16909 |
+
"grad_norm": 202.58731079101562,
|
| 16910 |
+
"learning_rate": 2.7475247524752477e-06,
|
| 16911 |
+
"loss": 0.1368,
|
| 16912 |
+
"step": 2410
|
| 16913 |
+
},
|
| 16914 |
+
{
|
| 16915 |
+
"epoch": 4.783730158730159,
|
| 16916 |
+
"grad_norm": 8.134492874145508,
|
| 16917 |
+
"learning_rate": 2.722772277227723e-06,
|
| 16918 |
+
"loss": 0.1036,
|
| 16919 |
+
"step": 2411
|
| 16920 |
+
},
|
| 16921 |
+
{
|
| 16922 |
+
"epoch": 4.785714285714286,
|
| 16923 |
+
"grad_norm": 306.3316345214844,
|
| 16924 |
+
"learning_rate": 2.698019801980198e-06,
|
| 16925 |
+
"loss": 0.1087,
|
| 16926 |
+
"step": 2412
|
| 16927 |
+
},
|
| 16928 |
+
{
|
| 16929 |
+
"epoch": 4.787698412698413,
|
| 16930 |
+
"grad_norm": 245.6614990234375,
|
| 16931 |
+
"learning_rate": 2.6732673267326735e-06,
|
| 16932 |
+
"loss": 0.1419,
|
| 16933 |
+
"step": 2413
|
| 16934 |
+
},
|
| 16935 |
+
{
|
| 16936 |
+
"epoch": 4.7896825396825395,
|
| 16937 |
+
"grad_norm": 92.39546203613281,
|
| 16938 |
+
"learning_rate": 2.6485148514851486e-06,
|
| 16939 |
+
"loss": 0.1356,
|
| 16940 |
+
"step": 2414
|
| 16941 |
+
},
|
| 16942 |
+
{
|
| 16943 |
+
"epoch": 4.791666666666667,
|
| 16944 |
+
"grad_norm": 77.70650482177734,
|
| 16945 |
+
"learning_rate": 2.6237623762376237e-06,
|
| 16946 |
+
"loss": 0.1296,
|
| 16947 |
+
"step": 2415
|
| 16948 |
+
},
|
| 16949 |
+
{
|
| 16950 |
+
"epoch": 4.7936507936507935,
|
| 16951 |
+
"grad_norm": 810.5091552734375,
|
| 16952 |
+
"learning_rate": 2.599009900990099e-06,
|
| 16953 |
+
"loss": 0.1205,
|
| 16954 |
+
"step": 2416
|
| 16955 |
+
},
|
| 16956 |
+
{
|
| 16957 |
+
"epoch": 4.795634920634921,
|
| 16958 |
+
"grad_norm": 97.3177719116211,
|
| 16959 |
+
"learning_rate": 2.5742574257425744e-06,
|
| 16960 |
+
"loss": 0.0847,
|
| 16961 |
+
"step": 2417
|
| 16962 |
+
},
|
| 16963 |
+
{
|
| 16964 |
+
"epoch": 4.7976190476190474,
|
| 16965 |
+
"grad_norm": 307.71124267578125,
|
| 16966 |
+
"learning_rate": 2.5495049504950495e-06,
|
| 16967 |
+
"loss": 0.0836,
|
| 16968 |
+
"step": 2418
|
| 16969 |
+
},
|
| 16970 |
+
{
|
| 16971 |
+
"epoch": 4.799603174603175,
|
| 16972 |
+
"grad_norm": 415.3011779785156,
|
| 16973 |
+
"learning_rate": 2.524752475247525e-06,
|
| 16974 |
+
"loss": 0.1633,
|
| 16975 |
+
"step": 2419
|
| 16976 |
+
},
|
| 16977 |
+
{
|
| 16978 |
+
"epoch": 4.801587301587301,
|
| 16979 |
+
"grad_norm": 9341.1962890625,
|
| 16980 |
+
"learning_rate": 2.5e-06,
|
| 16981 |
+
"loss": 0.1183,
|
| 16982 |
+
"step": 2420
|
| 16983 |
+
},
|
| 16984 |
+
{
|
| 16985 |
+
"epoch": 4.803571428571429,
|
| 16986 |
+
"grad_norm": 120.80254364013672,
|
| 16987 |
+
"learning_rate": 2.4752475247524753e-06,
|
| 16988 |
+
"loss": 0.1151,
|
| 16989 |
+
"step": 2421
|
| 16990 |
+
},
|
| 16991 |
+
{
|
| 16992 |
+
"epoch": 4.805555555555555,
|
| 16993 |
+
"grad_norm": 1246.0142822265625,
|
| 16994 |
+
"learning_rate": 2.450495049504951e-06,
|
| 16995 |
+
"loss": 0.1151,
|
| 16996 |
+
"step": 2422
|
| 16997 |
+
},
|
| 16998 |
+
{
|
| 16999 |
+
"epoch": 4.807539682539683,
|
| 17000 |
+
"grad_norm": 43.499595642089844,
|
| 17001 |
+
"learning_rate": 2.425742574257426e-06,
|
| 17002 |
+
"loss": 0.113,
|
| 17003 |
+
"step": 2423
|
| 17004 |
+
},
|
| 17005 |
+
{
|
| 17006 |
+
"epoch": 4.809523809523809,
|
| 17007 |
+
"grad_norm": 81.7345199584961,
|
| 17008 |
+
"learning_rate": 2.400990099009901e-06,
|
| 17009 |
+
"loss": 0.0984,
|
| 17010 |
+
"step": 2424
|
| 17011 |
+
},
|
| 17012 |
+
{
|
| 17013 |
+
"epoch": 4.811507936507937,
|
| 17014 |
+
"grad_norm": 63.05341339111328,
|
| 17015 |
+
"learning_rate": 2.3762376237623762e-06,
|
| 17016 |
+
"loss": 0.128,
|
| 17017 |
+
"step": 2425
|
| 17018 |
+
},
|
| 17019 |
+
{
|
| 17020 |
+
"epoch": 4.813492063492063,
|
| 17021 |
+
"grad_norm": 178.39157104492188,
|
| 17022 |
+
"learning_rate": 2.351485148514852e-06,
|
| 17023 |
+
"loss": 0.1231,
|
| 17024 |
+
"step": 2426
|
| 17025 |
+
},
|
| 17026 |
+
{
|
| 17027 |
+
"epoch": 4.815476190476191,
|
| 17028 |
+
"grad_norm": 15.4378023147583,
|
| 17029 |
+
"learning_rate": 2.326732673267327e-06,
|
| 17030 |
+
"loss": 0.1214,
|
| 17031 |
+
"step": 2427
|
| 17032 |
+
},
|
| 17033 |
+
{
|
| 17034 |
+
"epoch": 4.817460317460317,
|
| 17035 |
+
"grad_norm": 89.79084014892578,
|
| 17036 |
+
"learning_rate": 2.301980198019802e-06,
|
| 17037 |
+
"loss": 0.098,
|
| 17038 |
+
"step": 2428
|
| 17039 |
+
},
|
| 17040 |
+
{
|
| 17041 |
+
"epoch": 4.819444444444445,
|
| 17042 |
+
"grad_norm": 40.045284271240234,
|
| 17043 |
+
"learning_rate": 2.277227722772277e-06,
|
| 17044 |
+
"loss": 0.1048,
|
| 17045 |
+
"step": 2429
|
| 17046 |
+
},
|
| 17047 |
+
{
|
| 17048 |
+
"epoch": 4.821428571428571,
|
| 17049 |
+
"grad_norm": 210.37075805664062,
|
| 17050 |
+
"learning_rate": 2.2524752475247523e-06,
|
| 17051 |
+
"loss": 0.1422,
|
| 17052 |
+
"step": 2430
|
| 17053 |
+
},
|
| 17054 |
+
{
|
| 17055 |
+
"epoch": 4.823412698412699,
|
| 17056 |
+
"grad_norm": 259.76800537109375,
|
| 17057 |
+
"learning_rate": 2.227722772277228e-06,
|
| 17058 |
+
"loss": 0.1622,
|
| 17059 |
+
"step": 2431
|
| 17060 |
+
},
|
| 17061 |
+
{
|
| 17062 |
+
"epoch": 4.825396825396825,
|
| 17063 |
+
"grad_norm": 719.5950927734375,
|
| 17064 |
+
"learning_rate": 2.202970297029703e-06,
|
| 17065 |
+
"loss": 0.1006,
|
| 17066 |
+
"step": 2432
|
| 17067 |
+
},
|
| 17068 |
+
{
|
| 17069 |
+
"epoch": 4.8273809523809526,
|
| 17070 |
+
"grad_norm": 41.61597442626953,
|
| 17071 |
+
"learning_rate": 2.178217821782178e-06,
|
| 17072 |
+
"loss": 0.1025,
|
| 17073 |
+
"step": 2433
|
| 17074 |
+
},
|
| 17075 |
+
{
|
| 17076 |
+
"epoch": 4.829365079365079,
|
| 17077 |
+
"grad_norm": 167.94064331054688,
|
| 17078 |
+
"learning_rate": 2.153465346534653e-06,
|
| 17079 |
+
"loss": 0.1192,
|
| 17080 |
+
"step": 2434
|
| 17081 |
+
},
|
| 17082 |
+
{
|
| 17083 |
+
"epoch": 4.8313492063492065,
|
| 17084 |
+
"grad_norm": 1380.93701171875,
|
| 17085 |
+
"learning_rate": 2.128712871287129e-06,
|
| 17086 |
+
"loss": 0.1052,
|
| 17087 |
+
"step": 2435
|
| 17088 |
+
},
|
| 17089 |
+
{
|
| 17090 |
+
"epoch": 4.833333333333333,
|
| 17091 |
+
"grad_norm": 21.072771072387695,
|
| 17092 |
+
"learning_rate": 2.1039603960396043e-06,
|
| 17093 |
+
"loss": 0.1089,
|
| 17094 |
+
"step": 2436
|
| 17095 |
+
},
|
| 17096 |
+
{
|
| 17097 |
+
"epoch": 4.8353174603174605,
|
| 17098 |
+
"grad_norm": 252.01800537109375,
|
| 17099 |
+
"learning_rate": 2.0792079207920794e-06,
|
| 17100 |
+
"loss": 0.0922,
|
| 17101 |
+
"step": 2437
|
| 17102 |
+
},
|
| 17103 |
+
{
|
| 17104 |
+
"epoch": 4.837301587301587,
|
| 17105 |
+
"grad_norm": 35.98377990722656,
|
| 17106 |
+
"learning_rate": 2.0544554455445546e-06,
|
| 17107 |
+
"loss": 0.1014,
|
| 17108 |
+
"step": 2438
|
| 17109 |
+
},
|
| 17110 |
+
{
|
| 17111 |
+
"epoch": 4.839285714285714,
|
| 17112 |
+
"grad_norm": 40.20614242553711,
|
| 17113 |
+
"learning_rate": 2.0297029702970297e-06,
|
| 17114 |
+
"loss": 0.0948,
|
| 17115 |
+
"step": 2439
|
| 17116 |
+
},
|
| 17117 |
+
{
|
| 17118 |
+
"epoch": 4.841269841269841,
|
| 17119 |
+
"grad_norm": 301.1222839355469,
|
| 17120 |
+
"learning_rate": 2.0049504950495052e-06,
|
| 17121 |
+
"loss": 0.1044,
|
| 17122 |
+
"step": 2440
|
| 17123 |
+
},
|
| 17124 |
+
{
|
| 17125 |
+
"epoch": 4.843253968253968,
|
| 17126 |
+
"grad_norm": 75.2447509765625,
|
| 17127 |
+
"learning_rate": 1.9801980198019803e-06,
|
| 17128 |
+
"loss": 0.1149,
|
| 17129 |
+
"step": 2441
|
| 17130 |
+
},
|
| 17131 |
+
{
|
| 17132 |
+
"epoch": 4.845238095238095,
|
| 17133 |
+
"grad_norm": 54.770572662353516,
|
| 17134 |
+
"learning_rate": 1.9554455445544555e-06,
|
| 17135 |
+
"loss": 0.1348,
|
| 17136 |
+
"step": 2442
|
| 17137 |
+
},
|
| 17138 |
+
{
|
| 17139 |
+
"epoch": 4.847222222222222,
|
| 17140 |
+
"grad_norm": 1316.5123291015625,
|
| 17141 |
+
"learning_rate": 1.9306930693069306e-06,
|
| 17142 |
+
"loss": 0.0955,
|
| 17143 |
+
"step": 2443
|
| 17144 |
+
},
|
| 17145 |
+
{
|
| 17146 |
+
"epoch": 4.849206349206349,
|
| 17147 |
+
"grad_norm": 36.49045181274414,
|
| 17148 |
+
"learning_rate": 1.905940594059406e-06,
|
| 17149 |
+
"loss": 0.1112,
|
| 17150 |
+
"step": 2444
|
| 17151 |
+
},
|
| 17152 |
+
{
|
| 17153 |
+
"epoch": 4.851190476190476,
|
| 17154 |
+
"grad_norm": 164.73707580566406,
|
| 17155 |
+
"learning_rate": 1.8811881188118813e-06,
|
| 17156 |
+
"loss": 0.1167,
|
| 17157 |
+
"step": 2445
|
| 17158 |
+
},
|
| 17159 |
+
{
|
| 17160 |
+
"epoch": 4.853174603174603,
|
| 17161 |
+
"grad_norm": 103.83766174316406,
|
| 17162 |
+
"learning_rate": 1.8564356435643564e-06,
|
| 17163 |
+
"loss": 0.091,
|
| 17164 |
+
"step": 2446
|
| 17165 |
+
},
|
| 17166 |
+
{
|
| 17167 |
+
"epoch": 4.85515873015873,
|
| 17168 |
+
"grad_norm": 0.4168950915336609,
|
| 17169 |
+
"learning_rate": 1.8316831683168317e-06,
|
| 17170 |
+
"loss": 0.0961,
|
| 17171 |
+
"step": 2447
|
| 17172 |
+
},
|
| 17173 |
+
{
|
| 17174 |
+
"epoch": 4.857142857142857,
|
| 17175 |
+
"grad_norm": 25.705242156982422,
|
| 17176 |
+
"learning_rate": 1.8069306930693068e-06,
|
| 17177 |
+
"loss": 0.1023,
|
| 17178 |
+
"step": 2448
|
| 17179 |
+
},
|
| 17180 |
+
{
|
| 17181 |
+
"epoch": 4.859126984126984,
|
| 17182 |
+
"grad_norm": 334.29144287109375,
|
| 17183 |
+
"learning_rate": 1.7821782178217822e-06,
|
| 17184 |
+
"loss": 0.1213,
|
| 17185 |
+
"step": 2449
|
| 17186 |
+
},
|
| 17187 |
+
{
|
| 17188 |
+
"epoch": 4.861111111111111,
|
| 17189 |
+
"grad_norm": 335.0593566894531,
|
| 17190 |
+
"learning_rate": 1.7574257425742577e-06,
|
| 17191 |
+
"loss": 0.1569,
|
| 17192 |
+
"step": 2450
|
| 17193 |
+
},
|
| 17194 |
+
{
|
| 17195 |
+
"epoch": 4.863095238095238,
|
| 17196 |
+
"grad_norm": 43.26740646362305,
|
| 17197 |
+
"learning_rate": 1.7326732673267329e-06,
|
| 17198 |
+
"loss": 0.1075,
|
| 17199 |
+
"step": 2451
|
| 17200 |
+
},
|
| 17201 |
+
{
|
| 17202 |
+
"epoch": 4.865079365079366,
|
| 17203 |
+
"grad_norm": 38.5167236328125,
|
| 17204 |
+
"learning_rate": 1.7079207920792082e-06,
|
| 17205 |
+
"loss": 0.0999,
|
| 17206 |
+
"step": 2452
|
| 17207 |
+
},
|
| 17208 |
+
{
|
| 17209 |
+
"epoch": 4.867063492063492,
|
| 17210 |
+
"grad_norm": 247.79600524902344,
|
| 17211 |
+
"learning_rate": 1.6831683168316833e-06,
|
| 17212 |
+
"loss": 0.1168,
|
| 17213 |
+
"step": 2453
|
| 17214 |
+
},
|
| 17215 |
+
{
|
| 17216 |
+
"epoch": 4.869047619047619,
|
| 17217 |
+
"grad_norm": 16.682174682617188,
|
| 17218 |
+
"learning_rate": 1.6584158415841587e-06,
|
| 17219 |
+
"loss": 0.0888,
|
| 17220 |
+
"step": 2454
|
| 17221 |
+
},
|
| 17222 |
+
{
|
| 17223 |
+
"epoch": 4.871031746031746,
|
| 17224 |
+
"grad_norm": 123.03621673583984,
|
| 17225 |
+
"learning_rate": 1.6336633663366338e-06,
|
| 17226 |
+
"loss": 0.1081,
|
| 17227 |
+
"step": 2455
|
| 17228 |
+
},
|
| 17229 |
+
{
|
| 17230 |
+
"epoch": 4.8730158730158735,
|
| 17231 |
+
"grad_norm": 13.761883735656738,
|
| 17232 |
+
"learning_rate": 1.6089108910891091e-06,
|
| 17233 |
+
"loss": 0.1053,
|
| 17234 |
+
"step": 2456
|
| 17235 |
+
},
|
| 17236 |
+
{
|
| 17237 |
+
"epoch": 4.875,
|
| 17238 |
+
"grad_norm": 1165.3431396484375,
|
| 17239 |
+
"learning_rate": 1.5841584158415842e-06,
|
| 17240 |
+
"loss": 0.1137,
|
| 17241 |
+
"step": 2457
|
| 17242 |
+
},
|
| 17243 |
+
{
|
| 17244 |
+
"epoch": 4.8769841269841265,
|
| 17245 |
+
"grad_norm": 6.198860168457031,
|
| 17246 |
+
"learning_rate": 1.5594059405940596e-06,
|
| 17247 |
+
"loss": 0.1091,
|
| 17248 |
+
"step": 2458
|
| 17249 |
+
},
|
| 17250 |
+
{
|
| 17251 |
+
"epoch": 4.878968253968254,
|
| 17252 |
+
"grad_norm": 177.04393005371094,
|
| 17253 |
+
"learning_rate": 1.5346534653465347e-06,
|
| 17254 |
+
"loss": 0.1243,
|
| 17255 |
+
"step": 2459
|
| 17256 |
+
},
|
| 17257 |
+
{
|
| 17258 |
+
"epoch": 4.880952380952381,
|
| 17259 |
+
"grad_norm": 65.993896484375,
|
| 17260 |
+
"learning_rate": 1.50990099009901e-06,
|
| 17261 |
+
"loss": 0.1178,
|
| 17262 |
+
"step": 2460
|
| 17263 |
+
},
|
| 17264 |
+
{
|
| 17265 |
+
"epoch": 4.882936507936508,
|
| 17266 |
+
"grad_norm": 367.6841125488281,
|
| 17267 |
+
"learning_rate": 1.4851485148514852e-06,
|
| 17268 |
+
"loss": 0.0882,
|
| 17269 |
+
"step": 2461
|
| 17270 |
+
},
|
| 17271 |
+
{
|
| 17272 |
+
"epoch": 4.884920634920634,
|
| 17273 |
+
"grad_norm": 65.00841522216797,
|
| 17274 |
+
"learning_rate": 1.4603960396039605e-06,
|
| 17275 |
+
"loss": 0.1168,
|
| 17276 |
+
"step": 2462
|
| 17277 |
+
},
|
| 17278 |
+
{
|
| 17279 |
+
"epoch": 4.886904761904762,
|
| 17280 |
+
"grad_norm": 417.6282958984375,
|
| 17281 |
+
"learning_rate": 1.4356435643564358e-06,
|
| 17282 |
+
"loss": 0.1318,
|
| 17283 |
+
"step": 2463
|
| 17284 |
+
},
|
| 17285 |
+
{
|
| 17286 |
+
"epoch": 4.888888888888889,
|
| 17287 |
+
"grad_norm": 93.27454376220703,
|
| 17288 |
+
"learning_rate": 1.410891089108911e-06,
|
| 17289 |
+
"loss": 0.1143,
|
| 17290 |
+
"step": 2464
|
| 17291 |
+
},
|
| 17292 |
+
{
|
| 17293 |
+
"epoch": 4.890873015873016,
|
| 17294 |
+
"grad_norm": 131.75108337402344,
|
| 17295 |
+
"learning_rate": 1.3861386138613863e-06,
|
| 17296 |
+
"loss": 0.1247,
|
| 17297 |
+
"step": 2465
|
| 17298 |
+
},
|
| 17299 |
+
{
|
| 17300 |
+
"epoch": 4.892857142857143,
|
| 17301 |
+
"grad_norm": 115.73747253417969,
|
| 17302 |
+
"learning_rate": 1.3613861386138614e-06,
|
| 17303 |
+
"loss": 0.1087,
|
| 17304 |
+
"step": 2466
|
| 17305 |
+
},
|
| 17306 |
+
{
|
| 17307 |
+
"epoch": 4.89484126984127,
|
| 17308 |
+
"grad_norm": 609.8623046875,
|
| 17309 |
+
"learning_rate": 1.3366336633663367e-06,
|
| 17310 |
+
"loss": 0.1469,
|
| 17311 |
+
"step": 2467
|
| 17312 |
+
},
|
| 17313 |
+
{
|
| 17314 |
+
"epoch": 4.896825396825397,
|
| 17315 |
+
"grad_norm": 1871.4293212890625,
|
| 17316 |
+
"learning_rate": 1.3118811881188119e-06,
|
| 17317 |
+
"loss": 0.1097,
|
| 17318 |
+
"step": 2468
|
| 17319 |
+
},
|
| 17320 |
+
{
|
| 17321 |
+
"epoch": 4.898809523809524,
|
| 17322 |
+
"grad_norm": 66.50846099853516,
|
| 17323 |
+
"learning_rate": 1.2871287128712872e-06,
|
| 17324 |
+
"loss": 0.1119,
|
| 17325 |
+
"step": 2469
|
| 17326 |
+
},
|
| 17327 |
+
{
|
| 17328 |
+
"epoch": 4.900793650793651,
|
| 17329 |
+
"grad_norm": 30.7410945892334,
|
| 17330 |
+
"learning_rate": 1.2623762376237625e-06,
|
| 17331 |
+
"loss": 0.092,
|
| 17332 |
+
"step": 2470
|
| 17333 |
+
},
|
| 17334 |
+
{
|
| 17335 |
+
"epoch": 4.902777777777778,
|
| 17336 |
+
"grad_norm": 187.96954345703125,
|
| 17337 |
+
"learning_rate": 1.2376237623762377e-06,
|
| 17338 |
+
"loss": 0.1311,
|
| 17339 |
+
"step": 2471
|
| 17340 |
+
},
|
| 17341 |
+
{
|
| 17342 |
+
"epoch": 4.904761904761905,
|
| 17343 |
+
"grad_norm": 85.9355239868164,
|
| 17344 |
+
"learning_rate": 1.212871287128713e-06,
|
| 17345 |
+
"loss": 0.1486,
|
| 17346 |
+
"step": 2472
|
| 17347 |
+
},
|
| 17348 |
+
{
|
| 17349 |
+
"epoch": 4.906746031746032,
|
| 17350 |
+
"grad_norm": 27.18872833251953,
|
| 17351 |
+
"learning_rate": 1.1881188118811881e-06,
|
| 17352 |
+
"loss": 0.1618,
|
| 17353 |
+
"step": 2473
|
| 17354 |
+
},
|
| 17355 |
+
{
|
| 17356 |
+
"epoch": 4.908730158730159,
|
| 17357 |
+
"grad_norm": 67.06446075439453,
|
| 17358 |
+
"learning_rate": 1.1633663366336635e-06,
|
| 17359 |
+
"loss": 0.1523,
|
| 17360 |
+
"step": 2474
|
| 17361 |
+
},
|
| 17362 |
+
{
|
| 17363 |
+
"epoch": 4.910714285714286,
|
| 17364 |
+
"grad_norm": 167.47836303710938,
|
| 17365 |
+
"learning_rate": 1.1386138613861386e-06,
|
| 17366 |
+
"loss": 0.2945,
|
| 17367 |
+
"step": 2475
|
| 17368 |
+
},
|
| 17369 |
+
{
|
| 17370 |
+
"epoch": 4.912698412698413,
|
| 17371 |
+
"grad_norm": 338.9327697753906,
|
| 17372 |
+
"learning_rate": 1.113861386138614e-06,
|
| 17373 |
+
"loss": 0.1143,
|
| 17374 |
+
"step": 2476
|
| 17375 |
+
},
|
| 17376 |
+
{
|
| 17377 |
+
"epoch": 4.9146825396825395,
|
| 17378 |
+
"grad_norm": 128.0420684814453,
|
| 17379 |
+
"learning_rate": 1.089108910891089e-06,
|
| 17380 |
+
"loss": 0.0932,
|
| 17381 |
+
"step": 2477
|
| 17382 |
+
},
|
| 17383 |
+
{
|
| 17384 |
+
"epoch": 4.916666666666667,
|
| 17385 |
+
"grad_norm": 129.2307586669922,
|
| 17386 |
+
"learning_rate": 1.0643564356435646e-06,
|
| 17387 |
+
"loss": 0.1715,
|
| 17388 |
+
"step": 2478
|
| 17389 |
+
},
|
| 17390 |
+
{
|
| 17391 |
+
"epoch": 4.9186507936507935,
|
| 17392 |
+
"grad_norm": 104.76609802246094,
|
| 17393 |
+
"learning_rate": 1.0396039603960397e-06,
|
| 17394 |
+
"loss": 0.1205,
|
| 17395 |
+
"step": 2479
|
| 17396 |
+
},
|
| 17397 |
+
{
|
| 17398 |
+
"epoch": 4.920634920634921,
|
| 17399 |
+
"grad_norm": 434.3623962402344,
|
| 17400 |
+
"learning_rate": 1.0148514851485148e-06,
|
| 17401 |
+
"loss": 0.0985,
|
| 17402 |
+
"step": 2480
|
| 17403 |
+
},
|
| 17404 |
+
{
|
| 17405 |
+
"epoch": 4.9226190476190474,
|
| 17406 |
+
"grad_norm": 94.22132110595703,
|
| 17407 |
+
"learning_rate": 9.900990099009902e-07,
|
| 17408 |
+
"loss": 0.2376,
|
| 17409 |
+
"step": 2481
|
| 17410 |
+
},
|
| 17411 |
+
{
|
| 17412 |
+
"epoch": 4.924603174603175,
|
| 17413 |
+
"grad_norm": 199.75180053710938,
|
| 17414 |
+
"learning_rate": 9.653465346534653e-07,
|
| 17415 |
+
"loss": 0.2667,
|
| 17416 |
+
"step": 2482
|
| 17417 |
+
},
|
| 17418 |
+
{
|
| 17419 |
+
"epoch": 4.926587301587301,
|
| 17420 |
+
"grad_norm": 573.324951171875,
|
| 17421 |
+
"learning_rate": 9.405940594059406e-07,
|
| 17422 |
+
"loss": 0.1186,
|
| 17423 |
+
"step": 2483
|
| 17424 |
+
},
|
| 17425 |
+
{
|
| 17426 |
+
"epoch": 4.928571428571429,
|
| 17427 |
+
"grad_norm": 240.54193115234375,
|
| 17428 |
+
"learning_rate": 9.158415841584159e-07,
|
| 17429 |
+
"loss": 0.1264,
|
| 17430 |
+
"step": 2484
|
| 17431 |
+
},
|
| 17432 |
+
{
|
| 17433 |
+
"epoch": 4.930555555555555,
|
| 17434 |
+
"grad_norm": 19.704151153564453,
|
| 17435 |
+
"learning_rate": 8.910891089108911e-07,
|
| 17436 |
+
"loss": 0.1114,
|
| 17437 |
+
"step": 2485
|
| 17438 |
+
},
|
| 17439 |
+
{
|
| 17440 |
+
"epoch": 4.932539682539683,
|
| 17441 |
+
"grad_norm": 412.3170166015625,
|
| 17442 |
+
"learning_rate": 8.663366336633664e-07,
|
| 17443 |
+
"loss": 0.1257,
|
| 17444 |
+
"step": 2486
|
| 17445 |
+
},
|
| 17446 |
+
{
|
| 17447 |
+
"epoch": 4.934523809523809,
|
| 17448 |
+
"grad_norm": 83.70069122314453,
|
| 17449 |
+
"learning_rate": 8.415841584158417e-07,
|
| 17450 |
+
"loss": 0.1189,
|
| 17451 |
+
"step": 2487
|
| 17452 |
+
},
|
| 17453 |
+
{
|
| 17454 |
+
"epoch": 4.936507936507937,
|
| 17455 |
+
"grad_norm": 80.68509674072266,
|
| 17456 |
+
"learning_rate": 8.168316831683169e-07,
|
| 17457 |
+
"loss": 0.0985,
|
| 17458 |
+
"step": 2488
|
| 17459 |
+
},
|
| 17460 |
+
{
|
| 17461 |
+
"epoch": 4.938492063492063,
|
| 17462 |
+
"grad_norm": 53.381324768066406,
|
| 17463 |
+
"learning_rate": 7.920792079207921e-07,
|
| 17464 |
+
"loss": 0.116,
|
| 17465 |
+
"step": 2489
|
| 17466 |
+
},
|
| 17467 |
+
{
|
| 17468 |
+
"epoch": 4.940476190476191,
|
| 17469 |
+
"grad_norm": 109.44908905029297,
|
| 17470 |
+
"learning_rate": 7.673267326732673e-07,
|
| 17471 |
+
"loss": 0.1109,
|
| 17472 |
+
"step": 2490
|
| 17473 |
+
},
|
| 17474 |
+
{
|
| 17475 |
+
"epoch": 4.942460317460317,
|
| 17476 |
+
"grad_norm": 357.83502197265625,
|
| 17477 |
+
"learning_rate": 7.425742574257426e-07,
|
| 17478 |
+
"loss": 0.1906,
|
| 17479 |
+
"step": 2491
|
| 17480 |
+
},
|
| 17481 |
+
{
|
| 17482 |
+
"epoch": 4.944444444444445,
|
| 17483 |
+
"grad_norm": 179.75930786132812,
|
| 17484 |
+
"learning_rate": 7.178217821782179e-07,
|
| 17485 |
+
"loss": 0.1094,
|
| 17486 |
+
"step": 2492
|
| 17487 |
+
},
|
| 17488 |
+
{
|
| 17489 |
+
"epoch": 4.946428571428571,
|
| 17490 |
+
"grad_norm": 76.05357360839844,
|
| 17491 |
+
"learning_rate": 6.930693069306931e-07,
|
| 17492 |
+
"loss": 0.0863,
|
| 17493 |
+
"step": 2493
|
| 17494 |
+
},
|
| 17495 |
+
{
|
| 17496 |
+
"epoch": 4.948412698412699,
|
| 17497 |
+
"grad_norm": 111.01102447509766,
|
| 17498 |
+
"learning_rate": 6.683168316831684e-07,
|
| 17499 |
+
"loss": 0.1116,
|
| 17500 |
+
"step": 2494
|
| 17501 |
+
},
|
| 17502 |
+
{
|
| 17503 |
+
"epoch": 4.950396825396825,
|
| 17504 |
+
"grad_norm": 154.5319061279297,
|
| 17505 |
+
"learning_rate": 6.435643564356436e-07,
|
| 17506 |
+
"loss": 0.0981,
|
| 17507 |
+
"step": 2495
|
| 17508 |
+
},
|
| 17509 |
+
{
|
| 17510 |
+
"epoch": 4.9523809523809526,
|
| 17511 |
+
"grad_norm": 228.2860107421875,
|
| 17512 |
+
"learning_rate": 6.188118811881188e-07,
|
| 17513 |
+
"loss": 0.0916,
|
| 17514 |
+
"step": 2496
|
| 17515 |
+
},
|
| 17516 |
+
{
|
| 17517 |
+
"epoch": 4.954365079365079,
|
| 17518 |
+
"grad_norm": 34.52415466308594,
|
| 17519 |
+
"learning_rate": 5.940594059405941e-07,
|
| 17520 |
+
"loss": 0.1154,
|
| 17521 |
+
"step": 2497
|
| 17522 |
+
},
|
| 17523 |
+
{
|
| 17524 |
+
"epoch": 4.9563492063492065,
|
| 17525 |
+
"grad_norm": 611.2803955078125,
|
| 17526 |
+
"learning_rate": 5.693069306930693e-07,
|
| 17527 |
+
"loss": 0.2476,
|
| 17528 |
+
"step": 2498
|
| 17529 |
+
},
|
| 17530 |
+
{
|
| 17531 |
+
"epoch": 4.958333333333333,
|
| 17532 |
+
"grad_norm": 140.14830017089844,
|
| 17533 |
+
"learning_rate": 5.445544554455445e-07,
|
| 17534 |
+
"loss": 0.1469,
|
| 17535 |
+
"step": 2499
|
| 17536 |
+
},
|
| 17537 |
+
{
|
| 17538 |
+
"epoch": 4.9603174603174605,
|
| 17539 |
+
"grad_norm": 370.1153564453125,
|
| 17540 |
+
"learning_rate": 5.198019801980199e-07,
|
| 17541 |
+
"loss": 0.1458,
|
| 17542 |
+
"step": 2500
|
| 17543 |
+
},
|
| 17544 |
+
{
|
| 17545 |
+
"epoch": 4.9603174603174605,
|
| 17546 |
+
"eval_loss": 0.0735430046916008,
|
| 17547 |
+
"eval_runtime": 5.0649,
|
| 17548 |
+
"eval_samples_per_second": 454.302,
|
| 17549 |
+
"eval_steps_per_second": 14.215,
|
| 17550 |
+
"step": 2500
|
| 17551 |
}
|
| 17552 |
],
|
| 17553 |
"logging_steps": 1,
|
|
|
|
| 17562 |
"early_stopping_threshold": 0.0
|
| 17563 |
},
|
| 17564 |
"attributes": {
|
| 17565 |
+
"early_stopping_patience_counter": 1
|
| 17566 |
}
|
| 17567 |
},
|
| 17568 |
"TrainerControl": {
|
|
|
|
| 17576 |
"attributes": {}
|
| 17577 |
}
|
| 17578 |
},
|
| 17579 |
+
"total_flos": 2.3953344573210624e+16,
|
| 17580 |
"train_batch_size": 32,
|
| 17581 |
"trial_name": null,
|
| 17582 |
"trial_params": null
|