Training in progress, step 6000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 91951912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed9777fa1a77a56e9dd3241ee3dbd254aa1cf90fdeb5edfb1fcce31ac82e94fb
|
| 3 |
size 91951912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 183991627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9637bb35b55b1113901601f4621b591b014ca6a043584b2d2d2c061ad16c07c2
|
| 3 |
size 183991627
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79980175536a1928569d00db36b920754a385741f60dcf609f0aba7a8a424e74
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -38596,6 +38596,3514 @@
|
|
| 38596 |
"eval_samples_per_second": 27.364,
|
| 38597 |
"eval_steps_per_second": 1.71,
|
| 38598 |
"step": 5500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38599 |
}
|
| 38600 |
],
|
| 38601 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.0006,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 38596 |
"eval_samples_per_second": 27.364,
|
| 38597 |
"eval_steps_per_second": 1.71,
|
| 38598 |
"step": 5500
|
| 38599 |
+
},
|
| 38600 |
+
{
|
| 38601 |
+
"epoch": 0.0005501,
|
| 38602 |
+
"grad_norm": 2.1657469272613525,
|
| 38603 |
+
"learning_rate": 5.5e-05,
|
| 38604 |
+
"loss": 1.375,
|
| 38605 |
+
"step": 5501
|
| 38606 |
+
},
|
| 38607 |
+
{
|
| 38608 |
+
"epoch": 0.0005502,
|
| 38609 |
+
"grad_norm": 2.278029680252075,
|
| 38610 |
+
"learning_rate": 5.5010000000000004e-05,
|
| 38611 |
+
"loss": 1.375,
|
| 38612 |
+
"step": 5502
|
| 38613 |
+
},
|
| 38614 |
+
{
|
| 38615 |
+
"epoch": 0.0005503,
|
| 38616 |
+
"grad_norm": 2.653407335281372,
|
| 38617 |
+
"learning_rate": 5.502e-05,
|
| 38618 |
+
"loss": 1.7002,
|
| 38619 |
+
"step": 5503
|
| 38620 |
+
},
|
| 38621 |
+
{
|
| 38622 |
+
"epoch": 0.0005504,
|
| 38623 |
+
"grad_norm": 2.2126805782318115,
|
| 38624 |
+
"learning_rate": 5.503e-05,
|
| 38625 |
+
"loss": 1.3184,
|
| 38626 |
+
"step": 5504
|
| 38627 |
+
},
|
| 38628 |
+
{
|
| 38629 |
+
"epoch": 0.0005505,
|
| 38630 |
+
"grad_norm": 2.2534098625183105,
|
| 38631 |
+
"learning_rate": 5.504e-05,
|
| 38632 |
+
"loss": 1.3574,
|
| 38633 |
+
"step": 5505
|
| 38634 |
+
},
|
| 38635 |
+
{
|
| 38636 |
+
"epoch": 0.0005506,
|
| 38637 |
+
"grad_norm": 2.7865779399871826,
|
| 38638 |
+
"learning_rate": 5.505e-05,
|
| 38639 |
+
"loss": 1.6641,
|
| 38640 |
+
"step": 5506
|
| 38641 |
+
},
|
| 38642 |
+
{
|
| 38643 |
+
"epoch": 0.0005507,
|
| 38644 |
+
"grad_norm": 3.0122759342193604,
|
| 38645 |
+
"learning_rate": 5.506e-05,
|
| 38646 |
+
"loss": 1.7051,
|
| 38647 |
+
"step": 5507
|
| 38648 |
+
},
|
| 38649 |
+
{
|
| 38650 |
+
"epoch": 0.0005508,
|
| 38651 |
+
"grad_norm": 2.089824914932251,
|
| 38652 |
+
"learning_rate": 5.507e-05,
|
| 38653 |
+
"loss": 1.2598,
|
| 38654 |
+
"step": 5508
|
| 38655 |
+
},
|
| 38656 |
+
{
|
| 38657 |
+
"epoch": 0.0005509,
|
| 38658 |
+
"grad_norm": 3.3949615955352783,
|
| 38659 |
+
"learning_rate": 5.508000000000001e-05,
|
| 38660 |
+
"loss": 1.8682,
|
| 38661 |
+
"step": 5509
|
| 38662 |
+
},
|
| 38663 |
+
{
|
| 38664 |
+
"epoch": 0.000551,
|
| 38665 |
+
"grad_norm": 5.518685817718506,
|
| 38666 |
+
"learning_rate": 5.5089999999999996e-05,
|
| 38667 |
+
"loss": 1.5371,
|
| 38668 |
+
"step": 5510
|
| 38669 |
+
},
|
| 38670 |
+
{
|
| 38671 |
+
"epoch": 0.0005511,
|
| 38672 |
+
"grad_norm": 3.192974328994751,
|
| 38673 |
+
"learning_rate": 5.51e-05,
|
| 38674 |
+
"loss": 1.6025,
|
| 38675 |
+
"step": 5511
|
| 38676 |
+
},
|
| 38677 |
+
{
|
| 38678 |
+
"epoch": 0.0005512,
|
| 38679 |
+
"grad_norm": 2.699589967727661,
|
| 38680 |
+
"learning_rate": 5.5110000000000006e-05,
|
| 38681 |
+
"loss": 1.4658,
|
| 38682 |
+
"step": 5512
|
| 38683 |
+
},
|
| 38684 |
+
{
|
| 38685 |
+
"epoch": 0.0005513,
|
| 38686 |
+
"grad_norm": 2.347198486328125,
|
| 38687 |
+
"learning_rate": 5.5119999999999994e-05,
|
| 38688 |
+
"loss": 1.4316,
|
| 38689 |
+
"step": 5513
|
| 38690 |
+
},
|
| 38691 |
+
{
|
| 38692 |
+
"epoch": 0.0005514,
|
| 38693 |
+
"grad_norm": 2.406765937805176,
|
| 38694 |
+
"learning_rate": 5.513e-05,
|
| 38695 |
+
"loss": 1.375,
|
| 38696 |
+
"step": 5514
|
| 38697 |
+
},
|
| 38698 |
+
{
|
| 38699 |
+
"epoch": 0.0005515,
|
| 38700 |
+
"grad_norm": 2.5467593669891357,
|
| 38701 |
+
"learning_rate": 5.5140000000000004e-05,
|
| 38702 |
+
"loss": 1.4297,
|
| 38703 |
+
"step": 5515
|
| 38704 |
+
},
|
| 38705 |
+
{
|
| 38706 |
+
"epoch": 0.0005516,
|
| 38707 |
+
"grad_norm": 2.204380512237549,
|
| 38708 |
+
"learning_rate": 5.5150000000000006e-05,
|
| 38709 |
+
"loss": 1.2383,
|
| 38710 |
+
"step": 5516
|
| 38711 |
+
},
|
| 38712 |
+
{
|
| 38713 |
+
"epoch": 0.0005517,
|
| 38714 |
+
"grad_norm": 2.2602663040161133,
|
| 38715 |
+
"learning_rate": 5.516e-05,
|
| 38716 |
+
"loss": 1.3105,
|
| 38717 |
+
"step": 5517
|
| 38718 |
+
},
|
| 38719 |
+
{
|
| 38720 |
+
"epoch": 0.0005518,
|
| 38721 |
+
"grad_norm": 4.868964672088623,
|
| 38722 |
+
"learning_rate": 5.517e-05,
|
| 38723 |
+
"loss": 1.9258,
|
| 38724 |
+
"step": 5518
|
| 38725 |
+
},
|
| 38726 |
+
{
|
| 38727 |
+
"epoch": 0.0005519,
|
| 38728 |
+
"grad_norm": 2.5829179286956787,
|
| 38729 |
+
"learning_rate": 5.5180000000000004e-05,
|
| 38730 |
+
"loss": 1.4307,
|
| 38731 |
+
"step": 5519
|
| 38732 |
+
},
|
| 38733 |
+
{
|
| 38734 |
+
"epoch": 0.000552,
|
| 38735 |
+
"grad_norm": 2.0237176418304443,
|
| 38736 |
+
"learning_rate": 5.519e-05,
|
| 38737 |
+
"loss": 1.2646,
|
| 38738 |
+
"step": 5520
|
| 38739 |
+
},
|
| 38740 |
+
{
|
| 38741 |
+
"epoch": 0.0005521,
|
| 38742 |
+
"grad_norm": 1.967319130897522,
|
| 38743 |
+
"learning_rate": 5.52e-05,
|
| 38744 |
+
"loss": 1.1914,
|
| 38745 |
+
"step": 5521
|
| 38746 |
+
},
|
| 38747 |
+
{
|
| 38748 |
+
"epoch": 0.0005522,
|
| 38749 |
+
"grad_norm": 2.0111095905303955,
|
| 38750 |
+
"learning_rate": 5.521e-05,
|
| 38751 |
+
"loss": 1.293,
|
| 38752 |
+
"step": 5522
|
| 38753 |
+
},
|
| 38754 |
+
{
|
| 38755 |
+
"epoch": 0.0005523,
|
| 38756 |
+
"grad_norm": 2.2081053256988525,
|
| 38757 |
+
"learning_rate": 5.522e-05,
|
| 38758 |
+
"loss": 1.418,
|
| 38759 |
+
"step": 5523
|
| 38760 |
+
},
|
| 38761 |
+
{
|
| 38762 |
+
"epoch": 0.0005524,
|
| 38763 |
+
"grad_norm": 2.613236904144287,
|
| 38764 |
+
"learning_rate": 5.523e-05,
|
| 38765 |
+
"loss": 1.4785,
|
| 38766 |
+
"step": 5524
|
| 38767 |
+
},
|
| 38768 |
+
{
|
| 38769 |
+
"epoch": 0.0005525,
|
| 38770 |
+
"grad_norm": 3.4059560298919678,
|
| 38771 |
+
"learning_rate": 5.524e-05,
|
| 38772 |
+
"loss": 1.6406,
|
| 38773 |
+
"step": 5525
|
| 38774 |
+
},
|
| 38775 |
+
{
|
| 38776 |
+
"epoch": 0.0005526,
|
| 38777 |
+
"grad_norm": 2.1261260509490967,
|
| 38778 |
+
"learning_rate": 5.525000000000001e-05,
|
| 38779 |
+
"loss": 1.3711,
|
| 38780 |
+
"step": 5526
|
| 38781 |
+
},
|
| 38782 |
+
{
|
| 38783 |
+
"epoch": 0.0005527,
|
| 38784 |
+
"grad_norm": 3.0109899044036865,
|
| 38785 |
+
"learning_rate": 5.5259999999999996e-05,
|
| 38786 |
+
"loss": 1.4688,
|
| 38787 |
+
"step": 5527
|
| 38788 |
+
},
|
| 38789 |
+
{
|
| 38790 |
+
"epoch": 0.0005528,
|
| 38791 |
+
"grad_norm": 2.294259786605835,
|
| 38792 |
+
"learning_rate": 5.527e-05,
|
| 38793 |
+
"loss": 1.4619,
|
| 38794 |
+
"step": 5528
|
| 38795 |
+
},
|
| 38796 |
+
{
|
| 38797 |
+
"epoch": 0.0005529,
|
| 38798 |
+
"grad_norm": 2.180101156234741,
|
| 38799 |
+
"learning_rate": 5.5280000000000006e-05,
|
| 38800 |
+
"loss": 1.4219,
|
| 38801 |
+
"step": 5529
|
| 38802 |
+
},
|
| 38803 |
+
{
|
| 38804 |
+
"epoch": 0.000553,
|
| 38805 |
+
"grad_norm": 2.065366268157959,
|
| 38806 |
+
"learning_rate": 5.5289999999999994e-05,
|
| 38807 |
+
"loss": 1.3633,
|
| 38808 |
+
"step": 5530
|
| 38809 |
+
},
|
| 38810 |
+
{
|
| 38811 |
+
"epoch": 0.0005531,
|
| 38812 |
+
"grad_norm": 2.6592462062835693,
|
| 38813 |
+
"learning_rate": 5.53e-05,
|
| 38814 |
+
"loss": 1.2856,
|
| 38815 |
+
"step": 5531
|
| 38816 |
+
},
|
| 38817 |
+
{
|
| 38818 |
+
"epoch": 0.0005532,
|
| 38819 |
+
"grad_norm": 2.605691432952881,
|
| 38820 |
+
"learning_rate": 5.5310000000000004e-05,
|
| 38821 |
+
"loss": 1.498,
|
| 38822 |
+
"step": 5532
|
| 38823 |
+
},
|
| 38824 |
+
{
|
| 38825 |
+
"epoch": 0.0005533,
|
| 38826 |
+
"grad_norm": 2.811755418777466,
|
| 38827 |
+
"learning_rate": 5.5320000000000006e-05,
|
| 38828 |
+
"loss": 1.7549,
|
| 38829 |
+
"step": 5533
|
| 38830 |
+
},
|
| 38831 |
+
{
|
| 38832 |
+
"epoch": 0.0005534,
|
| 38833 |
+
"grad_norm": 2.2970316410064697,
|
| 38834 |
+
"learning_rate": 5.533e-05,
|
| 38835 |
+
"loss": 1.4023,
|
| 38836 |
+
"step": 5534
|
| 38837 |
+
},
|
| 38838 |
+
{
|
| 38839 |
+
"epoch": 0.0005535,
|
| 38840 |
+
"grad_norm": 2.2041218280792236,
|
| 38841 |
+
"learning_rate": 5.534e-05,
|
| 38842 |
+
"loss": 1.4492,
|
| 38843 |
+
"step": 5535
|
| 38844 |
+
},
|
| 38845 |
+
{
|
| 38846 |
+
"epoch": 0.0005536,
|
| 38847 |
+
"grad_norm": 3.6363894939422607,
|
| 38848 |
+
"learning_rate": 5.5350000000000004e-05,
|
| 38849 |
+
"loss": 2.0703,
|
| 38850 |
+
"step": 5536
|
| 38851 |
+
},
|
| 38852 |
+
{
|
| 38853 |
+
"epoch": 0.0005537,
|
| 38854 |
+
"grad_norm": 2.3322548866271973,
|
| 38855 |
+
"learning_rate": 5.536e-05,
|
| 38856 |
+
"loss": 1.3018,
|
| 38857 |
+
"step": 5537
|
| 38858 |
+
},
|
| 38859 |
+
{
|
| 38860 |
+
"epoch": 0.0005538,
|
| 38861 |
+
"grad_norm": 2.0842840671539307,
|
| 38862 |
+
"learning_rate": 5.537e-05,
|
| 38863 |
+
"loss": 1.3398,
|
| 38864 |
+
"step": 5538
|
| 38865 |
+
},
|
| 38866 |
+
{
|
| 38867 |
+
"epoch": 0.0005539,
|
| 38868 |
+
"grad_norm": 2.181790828704834,
|
| 38869 |
+
"learning_rate": 5.538e-05,
|
| 38870 |
+
"loss": 1.333,
|
| 38871 |
+
"step": 5539
|
| 38872 |
+
},
|
| 38873 |
+
{
|
| 38874 |
+
"epoch": 0.000554,
|
| 38875 |
+
"grad_norm": 2.298056125640869,
|
| 38876 |
+
"learning_rate": 5.539e-05,
|
| 38877 |
+
"loss": 1.416,
|
| 38878 |
+
"step": 5540
|
| 38879 |
+
},
|
| 38880 |
+
{
|
| 38881 |
+
"epoch": 0.0005541,
|
| 38882 |
+
"grad_norm": 3.197896718978882,
|
| 38883 |
+
"learning_rate": 5.54e-05,
|
| 38884 |
+
"loss": 1.2461,
|
| 38885 |
+
"step": 5541
|
| 38886 |
+
},
|
| 38887 |
+
{
|
| 38888 |
+
"epoch": 0.0005542,
|
| 38889 |
+
"grad_norm": 2.2699851989746094,
|
| 38890 |
+
"learning_rate": 5.541e-05,
|
| 38891 |
+
"loss": 1.2783,
|
| 38892 |
+
"step": 5542
|
| 38893 |
+
},
|
| 38894 |
+
{
|
| 38895 |
+
"epoch": 0.0005543,
|
| 38896 |
+
"grad_norm": 2.420722484588623,
|
| 38897 |
+
"learning_rate": 5.542000000000001e-05,
|
| 38898 |
+
"loss": 1.5762,
|
| 38899 |
+
"step": 5543
|
| 38900 |
+
},
|
| 38901 |
+
{
|
| 38902 |
+
"epoch": 0.0005544,
|
| 38903 |
+
"grad_norm": 2.019577980041504,
|
| 38904 |
+
"learning_rate": 5.5429999999999996e-05,
|
| 38905 |
+
"loss": 1.2617,
|
| 38906 |
+
"step": 5544
|
| 38907 |
+
},
|
| 38908 |
+
{
|
| 38909 |
+
"epoch": 0.0005545,
|
| 38910 |
+
"grad_norm": 2.4632229804992676,
|
| 38911 |
+
"learning_rate": 5.544e-05,
|
| 38912 |
+
"loss": 1.396,
|
| 38913 |
+
"step": 5545
|
| 38914 |
+
},
|
| 38915 |
+
{
|
| 38916 |
+
"epoch": 0.0005546,
|
| 38917 |
+
"grad_norm": 2.2937636375427246,
|
| 38918 |
+
"learning_rate": 5.5450000000000006e-05,
|
| 38919 |
+
"loss": 1.3311,
|
| 38920 |
+
"step": 5546
|
| 38921 |
+
},
|
| 38922 |
+
{
|
| 38923 |
+
"epoch": 0.0005547,
|
| 38924 |
+
"grad_norm": 2.050861120223999,
|
| 38925 |
+
"learning_rate": 5.5459999999999994e-05,
|
| 38926 |
+
"loss": 1.2666,
|
| 38927 |
+
"step": 5547
|
| 38928 |
+
},
|
| 38929 |
+
{
|
| 38930 |
+
"epoch": 0.0005548,
|
| 38931 |
+
"grad_norm": 2.0719165802001953,
|
| 38932 |
+
"learning_rate": 5.547e-05,
|
| 38933 |
+
"loss": 1.3037,
|
| 38934 |
+
"step": 5548
|
| 38935 |
+
},
|
| 38936 |
+
{
|
| 38937 |
+
"epoch": 0.0005549,
|
| 38938 |
+
"grad_norm": 2.0907227993011475,
|
| 38939 |
+
"learning_rate": 5.5480000000000004e-05,
|
| 38940 |
+
"loss": 1.2842,
|
| 38941 |
+
"step": 5549
|
| 38942 |
+
},
|
| 38943 |
+
{
|
| 38944 |
+
"epoch": 0.000555,
|
| 38945 |
+
"grad_norm": 2.092015504837036,
|
| 38946 |
+
"learning_rate": 5.5490000000000006e-05,
|
| 38947 |
+
"loss": 1.2529,
|
| 38948 |
+
"step": 5550
|
| 38949 |
+
},
|
| 38950 |
+
{
|
| 38951 |
+
"epoch": 0.0005551,
|
| 38952 |
+
"grad_norm": 2.0380799770355225,
|
| 38953 |
+
"learning_rate": 5.55e-05,
|
| 38954 |
+
"loss": 1.2295,
|
| 38955 |
+
"step": 5551
|
| 38956 |
+
},
|
| 38957 |
+
{
|
| 38958 |
+
"epoch": 0.0005552,
|
| 38959 |
+
"grad_norm": 2.0194711685180664,
|
| 38960 |
+
"learning_rate": 5.551e-05,
|
| 38961 |
+
"loss": 1.1826,
|
| 38962 |
+
"step": 5552
|
| 38963 |
+
},
|
| 38964 |
+
{
|
| 38965 |
+
"epoch": 0.0005553,
|
| 38966 |
+
"grad_norm": 1.984717845916748,
|
| 38967 |
+
"learning_rate": 5.5520000000000004e-05,
|
| 38968 |
+
"loss": 1.2383,
|
| 38969 |
+
"step": 5553
|
| 38970 |
+
},
|
| 38971 |
+
{
|
| 38972 |
+
"epoch": 0.0005554,
|
| 38973 |
+
"grad_norm": 1.9688911437988281,
|
| 38974 |
+
"learning_rate": 5.553e-05,
|
| 38975 |
+
"loss": 1.2822,
|
| 38976 |
+
"step": 5554
|
| 38977 |
+
},
|
| 38978 |
+
{
|
| 38979 |
+
"epoch": 0.0005555,
|
| 38980 |
+
"grad_norm": 3.0318920612335205,
|
| 38981 |
+
"learning_rate": 5.554e-05,
|
| 38982 |
+
"loss": 1.8086,
|
| 38983 |
+
"step": 5555
|
| 38984 |
+
},
|
| 38985 |
+
{
|
| 38986 |
+
"epoch": 0.0005556,
|
| 38987 |
+
"grad_norm": 2.0116493701934814,
|
| 38988 |
+
"learning_rate": 5.555e-05,
|
| 38989 |
+
"loss": 1.1943,
|
| 38990 |
+
"step": 5556
|
| 38991 |
+
},
|
| 38992 |
+
{
|
| 38993 |
+
"epoch": 0.0005557,
|
| 38994 |
+
"grad_norm": 2.2734971046447754,
|
| 38995 |
+
"learning_rate": 5.5559999999999997e-05,
|
| 38996 |
+
"loss": 1.4131,
|
| 38997 |
+
"step": 5557
|
| 38998 |
+
},
|
| 38999 |
+
{
|
| 39000 |
+
"epoch": 0.0005558,
|
| 39001 |
+
"grad_norm": 2.3707101345062256,
|
| 39002 |
+
"learning_rate": 5.557e-05,
|
| 39003 |
+
"loss": 1.3682,
|
| 39004 |
+
"step": 5558
|
| 39005 |
+
},
|
| 39006 |
+
{
|
| 39007 |
+
"epoch": 0.0005559,
|
| 39008 |
+
"grad_norm": 1.9980303049087524,
|
| 39009 |
+
"learning_rate": 5.558e-05,
|
| 39010 |
+
"loss": 1.2549,
|
| 39011 |
+
"step": 5559
|
| 39012 |
+
},
|
| 39013 |
+
{
|
| 39014 |
+
"epoch": 0.000556,
|
| 39015 |
+
"grad_norm": 2.1238646507263184,
|
| 39016 |
+
"learning_rate": 5.559000000000001e-05,
|
| 39017 |
+
"loss": 1.2539,
|
| 39018 |
+
"step": 5560
|
| 39019 |
+
},
|
| 39020 |
+
{
|
| 39021 |
+
"epoch": 0.0005561,
|
| 39022 |
+
"grad_norm": 2.268760919570923,
|
| 39023 |
+
"learning_rate": 5.5599999999999996e-05,
|
| 39024 |
+
"loss": 1.4111,
|
| 39025 |
+
"step": 5561
|
| 39026 |
+
},
|
| 39027 |
+
{
|
| 39028 |
+
"epoch": 0.0005562,
|
| 39029 |
+
"grad_norm": 2.3243017196655273,
|
| 39030 |
+
"learning_rate": 5.561e-05,
|
| 39031 |
+
"loss": 1.373,
|
| 39032 |
+
"step": 5562
|
| 39033 |
+
},
|
| 39034 |
+
{
|
| 39035 |
+
"epoch": 0.0005563,
|
| 39036 |
+
"grad_norm": 2.0662007331848145,
|
| 39037 |
+
"learning_rate": 5.5620000000000006e-05,
|
| 39038 |
+
"loss": 1.3096,
|
| 39039 |
+
"step": 5563
|
| 39040 |
+
},
|
| 39041 |
+
{
|
| 39042 |
+
"epoch": 0.0005564,
|
| 39043 |
+
"grad_norm": 2.231890916824341,
|
| 39044 |
+
"learning_rate": 5.5629999999999994e-05,
|
| 39045 |
+
"loss": 1.373,
|
| 39046 |
+
"step": 5564
|
| 39047 |
+
},
|
| 39048 |
+
{
|
| 39049 |
+
"epoch": 0.0005565,
|
| 39050 |
+
"grad_norm": 2.1430857181549072,
|
| 39051 |
+
"learning_rate": 5.564e-05,
|
| 39052 |
+
"loss": 1.1699,
|
| 39053 |
+
"step": 5565
|
| 39054 |
+
},
|
| 39055 |
+
{
|
| 39056 |
+
"epoch": 0.0005566,
|
| 39057 |
+
"grad_norm": 1.9390007257461548,
|
| 39058 |
+
"learning_rate": 5.5650000000000004e-05,
|
| 39059 |
+
"loss": 1.2163,
|
| 39060 |
+
"step": 5566
|
| 39061 |
+
},
|
| 39062 |
+
{
|
| 39063 |
+
"epoch": 0.0005567,
|
| 39064 |
+
"grad_norm": 3.218642473220825,
|
| 39065 |
+
"learning_rate": 5.5660000000000006e-05,
|
| 39066 |
+
"loss": 1.9434,
|
| 39067 |
+
"step": 5567
|
| 39068 |
+
},
|
| 39069 |
+
{
|
| 39070 |
+
"epoch": 0.0005568,
|
| 39071 |
+
"grad_norm": 2.7889389991760254,
|
| 39072 |
+
"learning_rate": 5.567e-05,
|
| 39073 |
+
"loss": 1.7451,
|
| 39074 |
+
"step": 5568
|
| 39075 |
+
},
|
| 39076 |
+
{
|
| 39077 |
+
"epoch": 0.0005569,
|
| 39078 |
+
"grad_norm": 2.129279613494873,
|
| 39079 |
+
"learning_rate": 5.568e-05,
|
| 39080 |
+
"loss": 1.2842,
|
| 39081 |
+
"step": 5569
|
| 39082 |
+
},
|
| 39083 |
+
{
|
| 39084 |
+
"epoch": 0.000557,
|
| 39085 |
+
"grad_norm": 2.407432794570923,
|
| 39086 |
+
"learning_rate": 5.5690000000000004e-05,
|
| 39087 |
+
"loss": 1.4795,
|
| 39088 |
+
"step": 5570
|
| 39089 |
+
},
|
| 39090 |
+
{
|
| 39091 |
+
"epoch": 0.0005571,
|
| 39092 |
+
"grad_norm": 2.932892084121704,
|
| 39093 |
+
"learning_rate": 5.57e-05,
|
| 39094 |
+
"loss": 1.2832,
|
| 39095 |
+
"step": 5571
|
| 39096 |
+
},
|
| 39097 |
+
{
|
| 39098 |
+
"epoch": 0.0005572,
|
| 39099 |
+
"grad_norm": 2.2704365253448486,
|
| 39100 |
+
"learning_rate": 5.571e-05,
|
| 39101 |
+
"loss": 1.3706,
|
| 39102 |
+
"step": 5572
|
| 39103 |
+
},
|
| 39104 |
+
{
|
| 39105 |
+
"epoch": 0.0005573,
|
| 39106 |
+
"grad_norm": 2.6265451908111572,
|
| 39107 |
+
"learning_rate": 5.572e-05,
|
| 39108 |
+
"loss": 1.2607,
|
| 39109 |
+
"step": 5573
|
| 39110 |
+
},
|
| 39111 |
+
{
|
| 39112 |
+
"epoch": 0.0005574,
|
| 39113 |
+
"grad_norm": 2.1422863006591797,
|
| 39114 |
+
"learning_rate": 5.573e-05,
|
| 39115 |
+
"loss": 1.2617,
|
| 39116 |
+
"step": 5574
|
| 39117 |
+
},
|
| 39118 |
+
{
|
| 39119 |
+
"epoch": 0.0005575,
|
| 39120 |
+
"grad_norm": 2.9647858142852783,
|
| 39121 |
+
"learning_rate": 5.574e-05,
|
| 39122 |
+
"loss": 1.3901,
|
| 39123 |
+
"step": 5575
|
| 39124 |
+
},
|
| 39125 |
+
{
|
| 39126 |
+
"epoch": 0.0005576,
|
| 39127 |
+
"grad_norm": 2.489037036895752,
|
| 39128 |
+
"learning_rate": 5.575e-05,
|
| 39129 |
+
"loss": 1.3994,
|
| 39130 |
+
"step": 5576
|
| 39131 |
+
},
|
| 39132 |
+
{
|
| 39133 |
+
"epoch": 0.0005577,
|
| 39134 |
+
"grad_norm": 2.4596824645996094,
|
| 39135 |
+
"learning_rate": 5.576000000000001e-05,
|
| 39136 |
+
"loss": 1.3701,
|
| 39137 |
+
"step": 5577
|
| 39138 |
+
},
|
| 39139 |
+
{
|
| 39140 |
+
"epoch": 0.0005578,
|
| 39141 |
+
"grad_norm": 2.2265207767486572,
|
| 39142 |
+
"learning_rate": 5.5769999999999996e-05,
|
| 39143 |
+
"loss": 1.248,
|
| 39144 |
+
"step": 5578
|
| 39145 |
+
},
|
| 39146 |
+
{
|
| 39147 |
+
"epoch": 0.0005579,
|
| 39148 |
+
"grad_norm": 2.3037335872650146,
|
| 39149 |
+
"learning_rate": 5.578e-05,
|
| 39150 |
+
"loss": 1.333,
|
| 39151 |
+
"step": 5579
|
| 39152 |
+
},
|
| 39153 |
+
{
|
| 39154 |
+
"epoch": 0.000558,
|
| 39155 |
+
"grad_norm": 2.651296615600586,
|
| 39156 |
+
"learning_rate": 5.5790000000000006e-05,
|
| 39157 |
+
"loss": 1.4697,
|
| 39158 |
+
"step": 5580
|
| 39159 |
+
},
|
| 39160 |
+
{
|
| 39161 |
+
"epoch": 0.0005581,
|
| 39162 |
+
"grad_norm": 2.3579533100128174,
|
| 39163 |
+
"learning_rate": 5.5799999999999994e-05,
|
| 39164 |
+
"loss": 1.3623,
|
| 39165 |
+
"step": 5581
|
| 39166 |
+
},
|
| 39167 |
+
{
|
| 39168 |
+
"epoch": 0.0005582,
|
| 39169 |
+
"grad_norm": 2.523846387863159,
|
| 39170 |
+
"learning_rate": 5.581e-05,
|
| 39171 |
+
"loss": 1.415,
|
| 39172 |
+
"step": 5582
|
| 39173 |
+
},
|
| 39174 |
+
{
|
| 39175 |
+
"epoch": 0.0005583,
|
| 39176 |
+
"grad_norm": 2.5580360889434814,
|
| 39177 |
+
"learning_rate": 5.5820000000000004e-05,
|
| 39178 |
+
"loss": 1.4512,
|
| 39179 |
+
"step": 5583
|
| 39180 |
+
},
|
| 39181 |
+
{
|
| 39182 |
+
"epoch": 0.0005584,
|
| 39183 |
+
"grad_norm": 2.344586133956909,
|
| 39184 |
+
"learning_rate": 5.5830000000000006e-05,
|
| 39185 |
+
"loss": 1.3662,
|
| 39186 |
+
"step": 5584
|
| 39187 |
+
},
|
| 39188 |
+
{
|
| 39189 |
+
"epoch": 0.0005585,
|
| 39190 |
+
"grad_norm": 2.040177822113037,
|
| 39191 |
+
"learning_rate": 5.584e-05,
|
| 39192 |
+
"loss": 1.2173,
|
| 39193 |
+
"step": 5585
|
| 39194 |
+
},
|
| 39195 |
+
{
|
| 39196 |
+
"epoch": 0.0005586,
|
| 39197 |
+
"grad_norm": 1.9751789569854736,
|
| 39198 |
+
"learning_rate": 5.585e-05,
|
| 39199 |
+
"loss": 1.1479,
|
| 39200 |
+
"step": 5586
|
| 39201 |
+
},
|
| 39202 |
+
{
|
| 39203 |
+
"epoch": 0.0005587,
|
| 39204 |
+
"grad_norm": 2.2699475288391113,
|
| 39205 |
+
"learning_rate": 5.5860000000000004e-05,
|
| 39206 |
+
"loss": 1.2803,
|
| 39207 |
+
"step": 5587
|
| 39208 |
+
},
|
| 39209 |
+
{
|
| 39210 |
+
"epoch": 0.0005588,
|
| 39211 |
+
"grad_norm": 2.3292815685272217,
|
| 39212 |
+
"learning_rate": 5.587e-05,
|
| 39213 |
+
"loss": 1.3145,
|
| 39214 |
+
"step": 5588
|
| 39215 |
+
},
|
| 39216 |
+
{
|
| 39217 |
+
"epoch": 0.0005589,
|
| 39218 |
+
"grad_norm": 2.4007952213287354,
|
| 39219 |
+
"learning_rate": 5.588e-05,
|
| 39220 |
+
"loss": 1.2822,
|
| 39221 |
+
"step": 5589
|
| 39222 |
+
},
|
| 39223 |
+
{
|
| 39224 |
+
"epoch": 0.000559,
|
| 39225 |
+
"grad_norm": 2.313913345336914,
|
| 39226 |
+
"learning_rate": 5.589e-05,
|
| 39227 |
+
"loss": 1.2842,
|
| 39228 |
+
"step": 5590
|
| 39229 |
+
},
|
| 39230 |
+
{
|
| 39231 |
+
"epoch": 0.0005591,
|
| 39232 |
+
"grad_norm": 2.481891632080078,
|
| 39233 |
+
"learning_rate": 5.59e-05,
|
| 39234 |
+
"loss": 1.5146,
|
| 39235 |
+
"step": 5591
|
| 39236 |
+
},
|
| 39237 |
+
{
|
| 39238 |
+
"epoch": 0.0005592,
|
| 39239 |
+
"grad_norm": 2.1890406608581543,
|
| 39240 |
+
"learning_rate": 5.591e-05,
|
| 39241 |
+
"loss": 1.2607,
|
| 39242 |
+
"step": 5592
|
| 39243 |
+
},
|
| 39244 |
+
{
|
| 39245 |
+
"epoch": 0.0005593,
|
| 39246 |
+
"grad_norm": 2.6850531101226807,
|
| 39247 |
+
"learning_rate": 5.592e-05,
|
| 39248 |
+
"loss": 1.4482,
|
| 39249 |
+
"step": 5593
|
| 39250 |
+
},
|
| 39251 |
+
{
|
| 39252 |
+
"epoch": 0.0005594,
|
| 39253 |
+
"grad_norm": 2.2254996299743652,
|
| 39254 |
+
"learning_rate": 5.593000000000001e-05,
|
| 39255 |
+
"loss": 1.291,
|
| 39256 |
+
"step": 5594
|
| 39257 |
+
},
|
| 39258 |
+
{
|
| 39259 |
+
"epoch": 0.0005595,
|
| 39260 |
+
"grad_norm": 2.0163609981536865,
|
| 39261 |
+
"learning_rate": 5.5939999999999996e-05,
|
| 39262 |
+
"loss": 1.2124,
|
| 39263 |
+
"step": 5595
|
| 39264 |
+
},
|
| 39265 |
+
{
|
| 39266 |
+
"epoch": 0.0005596,
|
| 39267 |
+
"grad_norm": 2.391115188598633,
|
| 39268 |
+
"learning_rate": 5.595e-05,
|
| 39269 |
+
"loss": 1.4141,
|
| 39270 |
+
"step": 5596
|
| 39271 |
+
},
|
| 39272 |
+
{
|
| 39273 |
+
"epoch": 0.0005597,
|
| 39274 |
+
"grad_norm": 2.7599549293518066,
|
| 39275 |
+
"learning_rate": 5.5960000000000006e-05,
|
| 39276 |
+
"loss": 1.5264,
|
| 39277 |
+
"step": 5597
|
| 39278 |
+
},
|
| 39279 |
+
{
|
| 39280 |
+
"epoch": 0.0005598,
|
| 39281 |
+
"grad_norm": 2.434512138366699,
|
| 39282 |
+
"learning_rate": 5.5969999999999994e-05,
|
| 39283 |
+
"loss": 1.3662,
|
| 39284 |
+
"step": 5598
|
| 39285 |
+
},
|
| 39286 |
+
{
|
| 39287 |
+
"epoch": 0.0005599,
|
| 39288 |
+
"grad_norm": 2.0736303329467773,
|
| 39289 |
+
"learning_rate": 5.598e-05,
|
| 39290 |
+
"loss": 1.1714,
|
| 39291 |
+
"step": 5599
|
| 39292 |
+
},
|
| 39293 |
+
{
|
| 39294 |
+
"epoch": 0.00056,
|
| 39295 |
+
"grad_norm": 2.0648913383483887,
|
| 39296 |
+
"learning_rate": 5.5990000000000004e-05,
|
| 39297 |
+
"loss": 1.1797,
|
| 39298 |
+
"step": 5600
|
| 39299 |
+
},
|
| 39300 |
+
{
|
| 39301 |
+
"epoch": 0.0005601,
|
| 39302 |
+
"grad_norm": 4.771807670593262,
|
| 39303 |
+
"learning_rate": 5.6000000000000006e-05,
|
| 39304 |
+
"loss": 1.6016,
|
| 39305 |
+
"step": 5601
|
| 39306 |
+
},
|
| 39307 |
+
{
|
| 39308 |
+
"epoch": 0.0005602,
|
| 39309 |
+
"grad_norm": 2.7045605182647705,
|
| 39310 |
+
"learning_rate": 5.601e-05,
|
| 39311 |
+
"loss": 1.4512,
|
| 39312 |
+
"step": 5602
|
| 39313 |
+
},
|
| 39314 |
+
{
|
| 39315 |
+
"epoch": 0.0005603,
|
| 39316 |
+
"grad_norm": 2.5183730125427246,
|
| 39317 |
+
"learning_rate": 5.602e-05,
|
| 39318 |
+
"loss": 1.376,
|
| 39319 |
+
"step": 5603
|
| 39320 |
+
},
|
| 39321 |
+
{
|
| 39322 |
+
"epoch": 0.0005604,
|
| 39323 |
+
"grad_norm": 2.135667085647583,
|
| 39324 |
+
"learning_rate": 5.6030000000000004e-05,
|
| 39325 |
+
"loss": 1.3174,
|
| 39326 |
+
"step": 5604
|
| 39327 |
+
},
|
| 39328 |
+
{
|
| 39329 |
+
"epoch": 0.0005605,
|
| 39330 |
+
"grad_norm": 2.1617133617401123,
|
| 39331 |
+
"learning_rate": 5.604e-05,
|
| 39332 |
+
"loss": 1.248,
|
| 39333 |
+
"step": 5605
|
| 39334 |
+
},
|
| 39335 |
+
{
|
| 39336 |
+
"epoch": 0.0005606,
|
| 39337 |
+
"grad_norm": 2.7303905487060547,
|
| 39338 |
+
"learning_rate": 5.605e-05,
|
| 39339 |
+
"loss": 1.3955,
|
| 39340 |
+
"step": 5606
|
| 39341 |
+
},
|
| 39342 |
+
{
|
| 39343 |
+
"epoch": 0.0005607,
|
| 39344 |
+
"grad_norm": 2.1086387634277344,
|
| 39345 |
+
"learning_rate": 5.606e-05,
|
| 39346 |
+
"loss": 1.3164,
|
| 39347 |
+
"step": 5607
|
| 39348 |
+
},
|
| 39349 |
+
{
|
| 39350 |
+
"epoch": 0.0005608,
|
| 39351 |
+
"grad_norm": 2.3741819858551025,
|
| 39352 |
+
"learning_rate": 5.607e-05,
|
| 39353 |
+
"loss": 1.4766,
|
| 39354 |
+
"step": 5608
|
| 39355 |
+
},
|
| 39356 |
+
{
|
| 39357 |
+
"epoch": 0.0005609,
|
| 39358 |
+
"grad_norm": 2.0160882472991943,
|
| 39359 |
+
"learning_rate": 5.608e-05,
|
| 39360 |
+
"loss": 1.2295,
|
| 39361 |
+
"step": 5609
|
| 39362 |
+
},
|
| 39363 |
+
{
|
| 39364 |
+
"epoch": 0.000561,
|
| 39365 |
+
"grad_norm": 1.9973747730255127,
|
| 39366 |
+
"learning_rate": 5.609e-05,
|
| 39367 |
+
"loss": 1.1104,
|
| 39368 |
+
"step": 5610
|
| 39369 |
+
},
|
| 39370 |
+
{
|
| 39371 |
+
"epoch": 0.0005611,
|
| 39372 |
+
"grad_norm": 2.368208885192871,
|
| 39373 |
+
"learning_rate": 5.610000000000001e-05,
|
| 39374 |
+
"loss": 1.6182,
|
| 39375 |
+
"step": 5611
|
| 39376 |
+
},
|
| 39377 |
+
{
|
| 39378 |
+
"epoch": 0.0005612,
|
| 39379 |
+
"grad_norm": 3.3395214080810547,
|
| 39380 |
+
"learning_rate": 5.6109999999999996e-05,
|
| 39381 |
+
"loss": 2.1504,
|
| 39382 |
+
"step": 5612
|
| 39383 |
+
},
|
| 39384 |
+
{
|
| 39385 |
+
"epoch": 0.0005613,
|
| 39386 |
+
"grad_norm": 2.3721203804016113,
|
| 39387 |
+
"learning_rate": 5.612e-05,
|
| 39388 |
+
"loss": 1.3037,
|
| 39389 |
+
"step": 5613
|
| 39390 |
+
},
|
| 39391 |
+
{
|
| 39392 |
+
"epoch": 0.0005614,
|
| 39393 |
+
"grad_norm": 2.190359115600586,
|
| 39394 |
+
"learning_rate": 5.6130000000000006e-05,
|
| 39395 |
+
"loss": 1.167,
|
| 39396 |
+
"step": 5614
|
| 39397 |
+
},
|
| 39398 |
+
{
|
| 39399 |
+
"epoch": 0.0005615,
|
| 39400 |
+
"grad_norm": 2.519402503967285,
|
| 39401 |
+
"learning_rate": 5.6139999999999994e-05,
|
| 39402 |
+
"loss": 1.3716,
|
| 39403 |
+
"step": 5615
|
| 39404 |
+
},
|
| 39405 |
+
{
|
| 39406 |
+
"epoch": 0.0005616,
|
| 39407 |
+
"grad_norm": 2.859174966812134,
|
| 39408 |
+
"learning_rate": 5.615e-05,
|
| 39409 |
+
"loss": 1.5605,
|
| 39410 |
+
"step": 5616
|
| 39411 |
+
},
|
| 39412 |
+
{
|
| 39413 |
+
"epoch": 0.0005617,
|
| 39414 |
+
"grad_norm": 2.296678304672241,
|
| 39415 |
+
"learning_rate": 5.6160000000000004e-05,
|
| 39416 |
+
"loss": 1.1484,
|
| 39417 |
+
"step": 5617
|
| 39418 |
+
},
|
| 39419 |
+
{
|
| 39420 |
+
"epoch": 0.0005618,
|
| 39421 |
+
"grad_norm": 2.1283135414123535,
|
| 39422 |
+
"learning_rate": 5.6170000000000006e-05,
|
| 39423 |
+
"loss": 1.1885,
|
| 39424 |
+
"step": 5618
|
| 39425 |
+
},
|
| 39426 |
+
{
|
| 39427 |
+
"epoch": 0.0005619,
|
| 39428 |
+
"grad_norm": 2.090508222579956,
|
| 39429 |
+
"learning_rate": 5.618e-05,
|
| 39430 |
+
"loss": 1.2754,
|
| 39431 |
+
"step": 5619
|
| 39432 |
+
},
|
| 39433 |
+
{
|
| 39434 |
+
"epoch": 0.000562,
|
| 39435 |
+
"grad_norm": 2.2708656787872314,
|
| 39436 |
+
"learning_rate": 5.619e-05,
|
| 39437 |
+
"loss": 1.458,
|
| 39438 |
+
"step": 5620
|
| 39439 |
+
},
|
| 39440 |
+
{
|
| 39441 |
+
"epoch": 0.0005621,
|
| 39442 |
+
"grad_norm": 1.9705712795257568,
|
| 39443 |
+
"learning_rate": 5.6200000000000004e-05,
|
| 39444 |
+
"loss": 1.082,
|
| 39445 |
+
"step": 5621
|
| 39446 |
+
},
|
| 39447 |
+
{
|
| 39448 |
+
"epoch": 0.0005622,
|
| 39449 |
+
"grad_norm": 1.862807273864746,
|
| 39450 |
+
"learning_rate": 5.621e-05,
|
| 39451 |
+
"loss": 1.1211,
|
| 39452 |
+
"step": 5622
|
| 39453 |
+
},
|
| 39454 |
+
{
|
| 39455 |
+
"epoch": 0.0005623,
|
| 39456 |
+
"grad_norm": 2.0587210655212402,
|
| 39457 |
+
"learning_rate": 5.622e-05,
|
| 39458 |
+
"loss": 1.2495,
|
| 39459 |
+
"step": 5623
|
| 39460 |
+
},
|
| 39461 |
+
{
|
| 39462 |
+
"epoch": 0.0005624,
|
| 39463 |
+
"grad_norm": 1.9742335081100464,
|
| 39464 |
+
"learning_rate": 5.623e-05,
|
| 39465 |
+
"loss": 1.165,
|
| 39466 |
+
"step": 5624
|
| 39467 |
+
},
|
| 39468 |
+
{
|
| 39469 |
+
"epoch": 0.0005625,
|
| 39470 |
+
"grad_norm": 2.1360883712768555,
|
| 39471 |
+
"learning_rate": 5.624e-05,
|
| 39472 |
+
"loss": 1.1914,
|
| 39473 |
+
"step": 5625
|
| 39474 |
+
},
|
| 39475 |
+
{
|
| 39476 |
+
"epoch": 0.0005626,
|
| 39477 |
+
"grad_norm": 2.0543458461761475,
|
| 39478 |
+
"learning_rate": 5.625e-05,
|
| 39479 |
+
"loss": 1.1826,
|
| 39480 |
+
"step": 5626
|
| 39481 |
+
},
|
| 39482 |
+
{
|
| 39483 |
+
"epoch": 0.0005627,
|
| 39484 |
+
"grad_norm": 2.346017360687256,
|
| 39485 |
+
"learning_rate": 5.626e-05,
|
| 39486 |
+
"loss": 1.3242,
|
| 39487 |
+
"step": 5627
|
| 39488 |
+
},
|
| 39489 |
+
{
|
| 39490 |
+
"epoch": 0.0005628,
|
| 39491 |
+
"grad_norm": 2.727350950241089,
|
| 39492 |
+
"learning_rate": 5.627000000000001e-05,
|
| 39493 |
+
"loss": 1.417,
|
| 39494 |
+
"step": 5628
|
| 39495 |
+
},
|
| 39496 |
+
{
|
| 39497 |
+
"epoch": 0.0005629,
|
| 39498 |
+
"grad_norm": 2.4296886920928955,
|
| 39499 |
+
"learning_rate": 5.6279999999999996e-05,
|
| 39500 |
+
"loss": 1.188,
|
| 39501 |
+
"step": 5629
|
| 39502 |
+
},
|
| 39503 |
+
{
|
| 39504 |
+
"epoch": 0.000563,
|
| 39505 |
+
"grad_norm": 2.0857579708099365,
|
| 39506 |
+
"learning_rate": 5.629e-05,
|
| 39507 |
+
"loss": 1.1699,
|
| 39508 |
+
"step": 5630
|
| 39509 |
+
},
|
| 39510 |
+
{
|
| 39511 |
+
"epoch": 0.0005631,
|
| 39512 |
+
"grad_norm": 2.0088858604431152,
|
| 39513 |
+
"learning_rate": 5.6300000000000006e-05,
|
| 39514 |
+
"loss": 1.2188,
|
| 39515 |
+
"step": 5631
|
| 39516 |
+
},
|
| 39517 |
+
{
|
| 39518 |
+
"epoch": 0.0005632,
|
| 39519 |
+
"grad_norm": 2.1220197677612305,
|
| 39520 |
+
"learning_rate": 5.6309999999999994e-05,
|
| 39521 |
+
"loss": 1.2217,
|
| 39522 |
+
"step": 5632
|
| 39523 |
+
},
|
| 39524 |
+
{
|
| 39525 |
+
"epoch": 0.0005633,
|
| 39526 |
+
"grad_norm": 2.684288740158081,
|
| 39527 |
+
"learning_rate": 5.632e-05,
|
| 39528 |
+
"loss": 1.3774,
|
| 39529 |
+
"step": 5633
|
| 39530 |
+
},
|
| 39531 |
+
{
|
| 39532 |
+
"epoch": 0.0005634,
|
| 39533 |
+
"grad_norm": 2.1442840099334717,
|
| 39534 |
+
"learning_rate": 5.6330000000000004e-05,
|
| 39535 |
+
"loss": 1.2217,
|
| 39536 |
+
"step": 5634
|
| 39537 |
+
},
|
| 39538 |
+
{
|
| 39539 |
+
"epoch": 0.0005635,
|
| 39540 |
+
"grad_norm": 2.0995047092437744,
|
| 39541 |
+
"learning_rate": 5.6340000000000006e-05,
|
| 39542 |
+
"loss": 1.2295,
|
| 39543 |
+
"step": 5635
|
| 39544 |
+
},
|
| 39545 |
+
{
|
| 39546 |
+
"epoch": 0.0005636,
|
| 39547 |
+
"grad_norm": 2.410370111465454,
|
| 39548 |
+
"learning_rate": 5.635e-05,
|
| 39549 |
+
"loss": 1.2529,
|
| 39550 |
+
"step": 5636
|
| 39551 |
+
},
|
| 39552 |
+
{
|
| 39553 |
+
"epoch": 0.0005637,
|
| 39554 |
+
"grad_norm": 2.588259220123291,
|
| 39555 |
+
"learning_rate": 5.636e-05,
|
| 39556 |
+
"loss": 1.3447,
|
| 39557 |
+
"step": 5637
|
| 39558 |
+
},
|
| 39559 |
+
{
|
| 39560 |
+
"epoch": 0.0005638,
|
| 39561 |
+
"grad_norm": 2.3008322715759277,
|
| 39562 |
+
"learning_rate": 5.6370000000000004e-05,
|
| 39563 |
+
"loss": 1.2861,
|
| 39564 |
+
"step": 5638
|
| 39565 |
+
},
|
| 39566 |
+
{
|
| 39567 |
+
"epoch": 0.0005639,
|
| 39568 |
+
"grad_norm": 2.134218215942383,
|
| 39569 |
+
"learning_rate": 5.638e-05,
|
| 39570 |
+
"loss": 1.1191,
|
| 39571 |
+
"step": 5639
|
| 39572 |
+
},
|
| 39573 |
+
{
|
| 39574 |
+
"epoch": 0.000564,
|
| 39575 |
+
"grad_norm": 2.9954631328582764,
|
| 39576 |
+
"learning_rate": 5.639e-05,
|
| 39577 |
+
"loss": 1.3828,
|
| 39578 |
+
"step": 5640
|
| 39579 |
+
},
|
| 39580 |
+
{
|
| 39581 |
+
"epoch": 0.0005641,
|
| 39582 |
+
"grad_norm": 2.734889030456543,
|
| 39583 |
+
"learning_rate": 5.64e-05,
|
| 39584 |
+
"loss": 1.6357,
|
| 39585 |
+
"step": 5641
|
| 39586 |
+
},
|
| 39587 |
+
{
|
| 39588 |
+
"epoch": 0.0005642,
|
| 39589 |
+
"grad_norm": 2.154879093170166,
|
| 39590 |
+
"learning_rate": 5.641e-05,
|
| 39591 |
+
"loss": 1.2168,
|
| 39592 |
+
"step": 5642
|
| 39593 |
+
},
|
| 39594 |
+
{
|
| 39595 |
+
"epoch": 0.0005643,
|
| 39596 |
+
"grad_norm": 2.6774098873138428,
|
| 39597 |
+
"learning_rate": 5.642e-05,
|
| 39598 |
+
"loss": 1.5977,
|
| 39599 |
+
"step": 5643
|
| 39600 |
+
},
|
| 39601 |
+
{
|
| 39602 |
+
"epoch": 0.0005644,
|
| 39603 |
+
"grad_norm": 2.5120511054992676,
|
| 39604 |
+
"learning_rate": 5.643e-05,
|
| 39605 |
+
"loss": 1.3242,
|
| 39606 |
+
"step": 5644
|
| 39607 |
+
},
|
| 39608 |
+
{
|
| 39609 |
+
"epoch": 0.0005645,
|
| 39610 |
+
"grad_norm": 2.580930233001709,
|
| 39611 |
+
"learning_rate": 5.644000000000001e-05,
|
| 39612 |
+
"loss": 1.4297,
|
| 39613 |
+
"step": 5645
|
| 39614 |
+
},
|
| 39615 |
+
{
|
| 39616 |
+
"epoch": 0.0005646,
|
| 39617 |
+
"grad_norm": 2.058643341064453,
|
| 39618 |
+
"learning_rate": 5.6449999999999997e-05,
|
| 39619 |
+
"loss": 1.1025,
|
| 39620 |
+
"step": 5646
|
| 39621 |
+
},
|
| 39622 |
+
{
|
| 39623 |
+
"epoch": 0.0005647,
|
| 39624 |
+
"grad_norm": 1.924004077911377,
|
| 39625 |
+
"learning_rate": 5.646e-05,
|
| 39626 |
+
"loss": 1.1406,
|
| 39627 |
+
"step": 5647
|
| 39628 |
+
},
|
| 39629 |
+
{
|
| 39630 |
+
"epoch": 0.0005648,
|
| 39631 |
+
"grad_norm": 2.348186492919922,
|
| 39632 |
+
"learning_rate": 5.6470000000000007e-05,
|
| 39633 |
+
"loss": 1.2178,
|
| 39634 |
+
"step": 5648
|
| 39635 |
+
},
|
| 39636 |
+
{
|
| 39637 |
+
"epoch": 0.0005649,
|
| 39638 |
+
"grad_norm": 2.400103807449341,
|
| 39639 |
+
"learning_rate": 5.6479999999999995e-05,
|
| 39640 |
+
"loss": 1.333,
|
| 39641 |
+
"step": 5649
|
| 39642 |
+
},
|
| 39643 |
+
{
|
| 39644 |
+
"epoch": 0.000565,
|
| 39645 |
+
"grad_norm": 2.0180234909057617,
|
| 39646 |
+
"learning_rate": 5.649e-05,
|
| 39647 |
+
"loss": 1.0439,
|
| 39648 |
+
"step": 5650
|
| 39649 |
+
},
|
| 39650 |
+
{
|
| 39651 |
+
"epoch": 0.0005651,
|
| 39652 |
+
"grad_norm": 2.074004650115967,
|
| 39653 |
+
"learning_rate": 5.6500000000000005e-05,
|
| 39654 |
+
"loss": 1.1084,
|
| 39655 |
+
"step": 5651
|
| 39656 |
+
},
|
| 39657 |
+
{
|
| 39658 |
+
"epoch": 0.0005652,
|
| 39659 |
+
"grad_norm": 2.834411144256592,
|
| 39660 |
+
"learning_rate": 5.6510000000000006e-05,
|
| 39661 |
+
"loss": 1.3525,
|
| 39662 |
+
"step": 5652
|
| 39663 |
+
},
|
| 39664 |
+
{
|
| 39665 |
+
"epoch": 0.0005653,
|
| 39666 |
+
"grad_norm": 2.1253347396850586,
|
| 39667 |
+
"learning_rate": 5.652e-05,
|
| 39668 |
+
"loss": 1.1514,
|
| 39669 |
+
"step": 5653
|
| 39670 |
+
},
|
| 39671 |
+
{
|
| 39672 |
+
"epoch": 0.0005654,
|
| 39673 |
+
"grad_norm": 2.0319814682006836,
|
| 39674 |
+
"learning_rate": 5.653e-05,
|
| 39675 |
+
"loss": 1.1621,
|
| 39676 |
+
"step": 5654
|
| 39677 |
+
},
|
| 39678 |
+
{
|
| 39679 |
+
"epoch": 0.0005655,
|
| 39680 |
+
"grad_norm": 1.959982991218567,
|
| 39681 |
+
"learning_rate": 5.6540000000000004e-05,
|
| 39682 |
+
"loss": 1.1929,
|
| 39683 |
+
"step": 5655
|
| 39684 |
+
},
|
| 39685 |
+
{
|
| 39686 |
+
"epoch": 0.0005656,
|
| 39687 |
+
"grad_norm": 2.5499229431152344,
|
| 39688 |
+
"learning_rate": 5.655e-05,
|
| 39689 |
+
"loss": 1.4463,
|
| 39690 |
+
"step": 5656
|
| 39691 |
+
},
|
| 39692 |
+
{
|
| 39693 |
+
"epoch": 0.0005657,
|
| 39694 |
+
"grad_norm": 2.4173882007598877,
|
| 39695 |
+
"learning_rate": 5.656e-05,
|
| 39696 |
+
"loss": 1.2705,
|
| 39697 |
+
"step": 5657
|
| 39698 |
+
},
|
| 39699 |
+
{
|
| 39700 |
+
"epoch": 0.0005658,
|
| 39701 |
+
"grad_norm": 2.9077694416046143,
|
| 39702 |
+
"learning_rate": 5.657e-05,
|
| 39703 |
+
"loss": 1.418,
|
| 39704 |
+
"step": 5658
|
| 39705 |
+
},
|
| 39706 |
+
{
|
| 39707 |
+
"epoch": 0.0005659,
|
| 39708 |
+
"grad_norm": 2.3227171897888184,
|
| 39709 |
+
"learning_rate": 5.658e-05,
|
| 39710 |
+
"loss": 1.3213,
|
| 39711 |
+
"step": 5659
|
| 39712 |
+
},
|
| 39713 |
+
{
|
| 39714 |
+
"epoch": 0.000566,
|
| 39715 |
+
"grad_norm": 2.192533016204834,
|
| 39716 |
+
"learning_rate": 5.659e-05,
|
| 39717 |
+
"loss": 1.1631,
|
| 39718 |
+
"step": 5660
|
| 39719 |
+
},
|
| 39720 |
+
{
|
| 39721 |
+
"epoch": 0.0005661,
|
| 39722 |
+
"grad_norm": 2.154204845428467,
|
| 39723 |
+
"learning_rate": 5.66e-05,
|
| 39724 |
+
"loss": 1.2314,
|
| 39725 |
+
"step": 5661
|
| 39726 |
+
},
|
| 39727 |
+
{
|
| 39728 |
+
"epoch": 0.0005662,
|
| 39729 |
+
"grad_norm": 2.6472861766815186,
|
| 39730 |
+
"learning_rate": 5.661000000000001e-05,
|
| 39731 |
+
"loss": 1.4326,
|
| 39732 |
+
"step": 5662
|
| 39733 |
+
},
|
| 39734 |
+
{
|
| 39735 |
+
"epoch": 0.0005663,
|
| 39736 |
+
"grad_norm": 2.488671064376831,
|
| 39737 |
+
"learning_rate": 5.662e-05,
|
| 39738 |
+
"loss": 1.2031,
|
| 39739 |
+
"step": 5663
|
| 39740 |
+
},
|
| 39741 |
+
{
|
| 39742 |
+
"epoch": 0.0005664,
|
| 39743 |
+
"grad_norm": 2.7378642559051514,
|
| 39744 |
+
"learning_rate": 5.663e-05,
|
| 39745 |
+
"loss": 1.4014,
|
| 39746 |
+
"step": 5664
|
| 39747 |
+
},
|
| 39748 |
+
{
|
| 39749 |
+
"epoch": 0.0005665,
|
| 39750 |
+
"grad_norm": 2.089963436126709,
|
| 39751 |
+
"learning_rate": 5.6640000000000007e-05,
|
| 39752 |
+
"loss": 1.1357,
|
| 39753 |
+
"step": 5665
|
| 39754 |
+
},
|
| 39755 |
+
{
|
| 39756 |
+
"epoch": 0.0005666,
|
| 39757 |
+
"grad_norm": 2.295555353164673,
|
| 39758 |
+
"learning_rate": 5.6649999999999995e-05,
|
| 39759 |
+
"loss": 1.2432,
|
| 39760 |
+
"step": 5666
|
| 39761 |
+
},
|
| 39762 |
+
{
|
| 39763 |
+
"epoch": 0.0005667,
|
| 39764 |
+
"grad_norm": 3.187014102935791,
|
| 39765 |
+
"learning_rate": 5.666e-05,
|
| 39766 |
+
"loss": 1.2603,
|
| 39767 |
+
"step": 5667
|
| 39768 |
+
},
|
| 39769 |
+
{
|
| 39770 |
+
"epoch": 0.0005668,
|
| 39771 |
+
"grad_norm": 2.1170108318328857,
|
| 39772 |
+
"learning_rate": 5.6670000000000005e-05,
|
| 39773 |
+
"loss": 1.125,
|
| 39774 |
+
"step": 5668
|
| 39775 |
+
},
|
| 39776 |
+
{
|
| 39777 |
+
"epoch": 0.0005669,
|
| 39778 |
+
"grad_norm": 2.15708327293396,
|
| 39779 |
+
"learning_rate": 5.6680000000000006e-05,
|
| 39780 |
+
"loss": 1.1699,
|
| 39781 |
+
"step": 5669
|
| 39782 |
+
},
|
| 39783 |
+
{
|
| 39784 |
+
"epoch": 0.000567,
|
| 39785 |
+
"grad_norm": 1.9517823457717896,
|
| 39786 |
+
"learning_rate": 5.669e-05,
|
| 39787 |
+
"loss": 1.1523,
|
| 39788 |
+
"step": 5670
|
| 39789 |
+
},
|
| 39790 |
+
{
|
| 39791 |
+
"epoch": 0.0005671,
|
| 39792 |
+
"grad_norm": 2.6591954231262207,
|
| 39793 |
+
"learning_rate": 5.67e-05,
|
| 39794 |
+
"loss": 1.2305,
|
| 39795 |
+
"step": 5671
|
| 39796 |
+
},
|
| 39797 |
+
{
|
| 39798 |
+
"epoch": 0.0005672,
|
| 39799 |
+
"grad_norm": 1.9791878461837769,
|
| 39800 |
+
"learning_rate": 5.6710000000000004e-05,
|
| 39801 |
+
"loss": 1.0776,
|
| 39802 |
+
"step": 5672
|
| 39803 |
+
},
|
| 39804 |
+
{
|
| 39805 |
+
"epoch": 0.0005673,
|
| 39806 |
+
"grad_norm": 2.0013673305511475,
|
| 39807 |
+
"learning_rate": 5.672e-05,
|
| 39808 |
+
"loss": 1.1182,
|
| 39809 |
+
"step": 5673
|
| 39810 |
+
},
|
| 39811 |
+
{
|
| 39812 |
+
"epoch": 0.0005674,
|
| 39813 |
+
"grad_norm": 2.209170341491699,
|
| 39814 |
+
"learning_rate": 5.673e-05,
|
| 39815 |
+
"loss": 1.2578,
|
| 39816 |
+
"step": 5674
|
| 39817 |
+
},
|
| 39818 |
+
{
|
| 39819 |
+
"epoch": 0.0005675,
|
| 39820 |
+
"grad_norm": 2.586726665496826,
|
| 39821 |
+
"learning_rate": 5.674e-05,
|
| 39822 |
+
"loss": 1.3379,
|
| 39823 |
+
"step": 5675
|
| 39824 |
+
},
|
| 39825 |
+
{
|
| 39826 |
+
"epoch": 0.0005676,
|
| 39827 |
+
"grad_norm": 1.9457072019577026,
|
| 39828 |
+
"learning_rate": 5.675e-05,
|
| 39829 |
+
"loss": 1.0288,
|
| 39830 |
+
"step": 5676
|
| 39831 |
+
},
|
| 39832 |
+
{
|
| 39833 |
+
"epoch": 0.0005677,
|
| 39834 |
+
"grad_norm": 2.2997748851776123,
|
| 39835 |
+
"learning_rate": 5.676e-05,
|
| 39836 |
+
"loss": 1.2896,
|
| 39837 |
+
"step": 5677
|
| 39838 |
+
},
|
| 39839 |
+
{
|
| 39840 |
+
"epoch": 0.0005678,
|
| 39841 |
+
"grad_norm": 1.9310775995254517,
|
| 39842 |
+
"learning_rate": 5.677e-05,
|
| 39843 |
+
"loss": 1.2266,
|
| 39844 |
+
"step": 5678
|
| 39845 |
+
},
|
| 39846 |
+
{
|
| 39847 |
+
"epoch": 0.0005679,
|
| 39848 |
+
"grad_norm": 1.9865697622299194,
|
| 39849 |
+
"learning_rate": 5.678000000000001e-05,
|
| 39850 |
+
"loss": 1.1147,
|
| 39851 |
+
"step": 5679
|
| 39852 |
+
},
|
| 39853 |
+
{
|
| 39854 |
+
"epoch": 0.000568,
|
| 39855 |
+
"grad_norm": 1.958775281906128,
|
| 39856 |
+
"learning_rate": 5.679e-05,
|
| 39857 |
+
"loss": 1.0835,
|
| 39858 |
+
"step": 5680
|
| 39859 |
+
},
|
| 39860 |
+
{
|
| 39861 |
+
"epoch": 0.0005681,
|
| 39862 |
+
"grad_norm": 1.8496625423431396,
|
| 39863 |
+
"learning_rate": 5.68e-05,
|
| 39864 |
+
"loss": 1.0537,
|
| 39865 |
+
"step": 5681
|
| 39866 |
+
},
|
| 39867 |
+
{
|
| 39868 |
+
"epoch": 0.0005682,
|
| 39869 |
+
"grad_norm": 1.922885537147522,
|
| 39870 |
+
"learning_rate": 5.681000000000001e-05,
|
| 39871 |
+
"loss": 1.1465,
|
| 39872 |
+
"step": 5682
|
| 39873 |
+
},
|
| 39874 |
+
{
|
| 39875 |
+
"epoch": 0.0005683,
|
| 39876 |
+
"grad_norm": 1.9769484996795654,
|
| 39877 |
+
"learning_rate": 5.6819999999999995e-05,
|
| 39878 |
+
"loss": 1.0781,
|
| 39879 |
+
"step": 5683
|
| 39880 |
+
},
|
| 39881 |
+
{
|
| 39882 |
+
"epoch": 0.0005684,
|
| 39883 |
+
"grad_norm": 1.9770240783691406,
|
| 39884 |
+
"learning_rate": 5.683e-05,
|
| 39885 |
+
"loss": 1.1118,
|
| 39886 |
+
"step": 5684
|
| 39887 |
+
},
|
| 39888 |
+
{
|
| 39889 |
+
"epoch": 0.0005685,
|
| 39890 |
+
"grad_norm": 1.9034974575042725,
|
| 39891 |
+
"learning_rate": 5.6840000000000005e-05,
|
| 39892 |
+
"loss": 1.0576,
|
| 39893 |
+
"step": 5685
|
| 39894 |
+
},
|
| 39895 |
+
{
|
| 39896 |
+
"epoch": 0.0005686,
|
| 39897 |
+
"grad_norm": 2.0151827335357666,
|
| 39898 |
+
"learning_rate": 5.6850000000000006e-05,
|
| 39899 |
+
"loss": 1.0386,
|
| 39900 |
+
"step": 5686
|
| 39901 |
+
},
|
| 39902 |
+
{
|
| 39903 |
+
"epoch": 0.0005687,
|
| 39904 |
+
"grad_norm": 3.5754876136779785,
|
| 39905 |
+
"learning_rate": 5.686e-05,
|
| 39906 |
+
"loss": 1.6006,
|
| 39907 |
+
"step": 5687
|
| 39908 |
+
},
|
| 39909 |
+
{
|
| 39910 |
+
"epoch": 0.0005688,
|
| 39911 |
+
"grad_norm": 2.2252042293548584,
|
| 39912 |
+
"learning_rate": 5.687e-05,
|
| 39913 |
+
"loss": 1.1045,
|
| 39914 |
+
"step": 5688
|
| 39915 |
+
},
|
| 39916 |
+
{
|
| 39917 |
+
"epoch": 0.0005689,
|
| 39918 |
+
"grad_norm": 2.010793447494507,
|
| 39919 |
+
"learning_rate": 5.6880000000000004e-05,
|
| 39920 |
+
"loss": 1.0869,
|
| 39921 |
+
"step": 5689
|
| 39922 |
+
},
|
| 39923 |
+
{
|
| 39924 |
+
"epoch": 0.000569,
|
| 39925 |
+
"grad_norm": 1.8892912864685059,
|
| 39926 |
+
"learning_rate": 5.689e-05,
|
| 39927 |
+
"loss": 1.1279,
|
| 39928 |
+
"step": 5690
|
| 39929 |
+
},
|
| 39930 |
+
{
|
| 39931 |
+
"epoch": 0.0005691,
|
| 39932 |
+
"grad_norm": 1.9598063230514526,
|
| 39933 |
+
"learning_rate": 5.69e-05,
|
| 39934 |
+
"loss": 1.0938,
|
| 39935 |
+
"step": 5691
|
| 39936 |
+
},
|
| 39937 |
+
{
|
| 39938 |
+
"epoch": 0.0005692,
|
| 39939 |
+
"grad_norm": 1.9675894975662231,
|
| 39940 |
+
"learning_rate": 5.691e-05,
|
| 39941 |
+
"loss": 1.0811,
|
| 39942 |
+
"step": 5692
|
| 39943 |
+
},
|
| 39944 |
+
{
|
| 39945 |
+
"epoch": 0.0005693,
|
| 39946 |
+
"grad_norm": 1.9908654689788818,
|
| 39947 |
+
"learning_rate": 5.692e-05,
|
| 39948 |
+
"loss": 1.165,
|
| 39949 |
+
"step": 5693
|
| 39950 |
+
},
|
| 39951 |
+
{
|
| 39952 |
+
"epoch": 0.0005694,
|
| 39953 |
+
"grad_norm": 1.8950060606002808,
|
| 39954 |
+
"learning_rate": 5.693e-05,
|
| 39955 |
+
"loss": 1.1211,
|
| 39956 |
+
"step": 5694
|
| 39957 |
+
},
|
| 39958 |
+
{
|
| 39959 |
+
"epoch": 0.0005695,
|
| 39960 |
+
"grad_norm": 2.0623393058776855,
|
| 39961 |
+
"learning_rate": 5.694e-05,
|
| 39962 |
+
"loss": 1.124,
|
| 39963 |
+
"step": 5695
|
| 39964 |
+
},
|
| 39965 |
+
{
|
| 39966 |
+
"epoch": 0.0005696,
|
| 39967 |
+
"grad_norm": 2.491461753845215,
|
| 39968 |
+
"learning_rate": 5.695e-05,
|
| 39969 |
+
"loss": 1.3096,
|
| 39970 |
+
"step": 5696
|
| 39971 |
+
},
|
| 39972 |
+
{
|
| 39973 |
+
"epoch": 0.0005697,
|
| 39974 |
+
"grad_norm": 2.5069475173950195,
|
| 39975 |
+
"learning_rate": 5.696e-05,
|
| 39976 |
+
"loss": 1.3896,
|
| 39977 |
+
"step": 5697
|
| 39978 |
+
},
|
| 39979 |
+
{
|
| 39980 |
+
"epoch": 0.0005698,
|
| 39981 |
+
"grad_norm": 1.8650083541870117,
|
| 39982 |
+
"learning_rate": 5.697e-05,
|
| 39983 |
+
"loss": 1.0679,
|
| 39984 |
+
"step": 5698
|
| 39985 |
+
},
|
| 39986 |
+
{
|
| 39987 |
+
"epoch": 0.0005699,
|
| 39988 |
+
"grad_norm": 1.7204188108444214,
|
| 39989 |
+
"learning_rate": 5.698000000000001e-05,
|
| 39990 |
+
"loss": 1.0107,
|
| 39991 |
+
"step": 5699
|
| 39992 |
+
},
|
| 39993 |
+
{
|
| 39994 |
+
"epoch": 0.00057,
|
| 39995 |
+
"grad_norm": 2.277846336364746,
|
| 39996 |
+
"learning_rate": 5.6989999999999995e-05,
|
| 39997 |
+
"loss": 1.2178,
|
| 39998 |
+
"step": 5700
|
| 39999 |
+
},
|
| 40000 |
+
{
|
| 40001 |
+
"epoch": 0.0005701,
|
| 40002 |
+
"grad_norm": 2.155337333679199,
|
| 40003 |
+
"learning_rate": 5.7e-05,
|
| 40004 |
+
"loss": 1.3115,
|
| 40005 |
+
"step": 5701
|
| 40006 |
+
},
|
| 40007 |
+
{
|
| 40008 |
+
"epoch": 0.0005702,
|
| 40009 |
+
"grad_norm": 2.2528960704803467,
|
| 40010 |
+
"learning_rate": 5.7010000000000005e-05,
|
| 40011 |
+
"loss": 1.2407,
|
| 40012 |
+
"step": 5702
|
| 40013 |
+
},
|
| 40014 |
+
{
|
| 40015 |
+
"epoch": 0.0005703,
|
| 40016 |
+
"grad_norm": 1.8725662231445312,
|
| 40017 |
+
"learning_rate": 5.7020000000000006e-05,
|
| 40018 |
+
"loss": 1.1646,
|
| 40019 |
+
"step": 5703
|
| 40020 |
+
},
|
| 40021 |
+
{
|
| 40022 |
+
"epoch": 0.0005704,
|
| 40023 |
+
"grad_norm": 2.5592408180236816,
|
| 40024 |
+
"learning_rate": 5.703e-05,
|
| 40025 |
+
"loss": 1.3496,
|
| 40026 |
+
"step": 5704
|
| 40027 |
+
},
|
| 40028 |
+
{
|
| 40029 |
+
"epoch": 0.0005705,
|
| 40030 |
+
"grad_norm": 2.1637556552886963,
|
| 40031 |
+
"learning_rate": 5.704e-05,
|
| 40032 |
+
"loss": 1.249,
|
| 40033 |
+
"step": 5705
|
| 40034 |
+
},
|
| 40035 |
+
{
|
| 40036 |
+
"epoch": 0.0005706,
|
| 40037 |
+
"grad_norm": 2.220670461654663,
|
| 40038 |
+
"learning_rate": 5.7050000000000004e-05,
|
| 40039 |
+
"loss": 1.1992,
|
| 40040 |
+
"step": 5706
|
| 40041 |
+
},
|
| 40042 |
+
{
|
| 40043 |
+
"epoch": 0.0005707,
|
| 40044 |
+
"grad_norm": 1.9339892864227295,
|
| 40045 |
+
"learning_rate": 5.706e-05,
|
| 40046 |
+
"loss": 1.1338,
|
| 40047 |
+
"step": 5707
|
| 40048 |
+
},
|
| 40049 |
+
{
|
| 40050 |
+
"epoch": 0.0005708,
|
| 40051 |
+
"grad_norm": 2.1802027225494385,
|
| 40052 |
+
"learning_rate": 5.707e-05,
|
| 40053 |
+
"loss": 1.2393,
|
| 40054 |
+
"step": 5708
|
| 40055 |
+
},
|
| 40056 |
+
{
|
| 40057 |
+
"epoch": 0.0005709,
|
| 40058 |
+
"grad_norm": 1.7978614568710327,
|
| 40059 |
+
"learning_rate": 5.708e-05,
|
| 40060 |
+
"loss": 1.0391,
|
| 40061 |
+
"step": 5709
|
| 40062 |
+
},
|
| 40063 |
+
{
|
| 40064 |
+
"epoch": 0.000571,
|
| 40065 |
+
"grad_norm": 3.548150062561035,
|
| 40066 |
+
"learning_rate": 5.709e-05,
|
| 40067 |
+
"loss": 1.1035,
|
| 40068 |
+
"step": 5710
|
| 40069 |
+
},
|
| 40070 |
+
{
|
| 40071 |
+
"epoch": 0.0005711,
|
| 40072 |
+
"grad_norm": 2.054603099822998,
|
| 40073 |
+
"learning_rate": 5.71e-05,
|
| 40074 |
+
"loss": 1.0298,
|
| 40075 |
+
"step": 5711
|
| 40076 |
+
},
|
| 40077 |
+
{
|
| 40078 |
+
"epoch": 0.0005712,
|
| 40079 |
+
"grad_norm": 2.206544876098633,
|
| 40080 |
+
"learning_rate": 5.711e-05,
|
| 40081 |
+
"loss": 1.2344,
|
| 40082 |
+
"step": 5712
|
| 40083 |
+
},
|
| 40084 |
+
{
|
| 40085 |
+
"epoch": 0.0005713,
|
| 40086 |
+
"grad_norm": 4.416079521179199,
|
| 40087 |
+
"learning_rate": 5.712e-05,
|
| 40088 |
+
"loss": 1.4077,
|
| 40089 |
+
"step": 5713
|
| 40090 |
+
},
|
| 40091 |
+
{
|
| 40092 |
+
"epoch": 0.0005714,
|
| 40093 |
+
"grad_norm": 2.3344037532806396,
|
| 40094 |
+
"learning_rate": 5.713e-05,
|
| 40095 |
+
"loss": 1.0947,
|
| 40096 |
+
"step": 5714
|
| 40097 |
+
},
|
| 40098 |
+
{
|
| 40099 |
+
"epoch": 0.0005715,
|
| 40100 |
+
"grad_norm": 2.2653141021728516,
|
| 40101 |
+
"learning_rate": 5.714e-05,
|
| 40102 |
+
"loss": 1.0908,
|
| 40103 |
+
"step": 5715
|
| 40104 |
+
},
|
| 40105 |
+
{
|
| 40106 |
+
"epoch": 0.0005716,
|
| 40107 |
+
"grad_norm": 2.118607997894287,
|
| 40108 |
+
"learning_rate": 5.715000000000001e-05,
|
| 40109 |
+
"loss": 1.2393,
|
| 40110 |
+
"step": 5716
|
| 40111 |
+
},
|
| 40112 |
+
{
|
| 40113 |
+
"epoch": 0.0005717,
|
| 40114 |
+
"grad_norm": 2.184203863143921,
|
| 40115 |
+
"learning_rate": 5.7159999999999995e-05,
|
| 40116 |
+
"loss": 1.147,
|
| 40117 |
+
"step": 5717
|
| 40118 |
+
},
|
| 40119 |
+
{
|
| 40120 |
+
"epoch": 0.0005718,
|
| 40121 |
+
"grad_norm": 2.034911870956421,
|
| 40122 |
+
"learning_rate": 5.7169999999999996e-05,
|
| 40123 |
+
"loss": 1.1333,
|
| 40124 |
+
"step": 5718
|
| 40125 |
+
},
|
| 40126 |
+
{
|
| 40127 |
+
"epoch": 0.0005719,
|
| 40128 |
+
"grad_norm": 1.8440461158752441,
|
| 40129 |
+
"learning_rate": 5.7180000000000005e-05,
|
| 40130 |
+
"loss": 0.9668,
|
| 40131 |
+
"step": 5719
|
| 40132 |
+
},
|
| 40133 |
+
{
|
| 40134 |
+
"epoch": 0.000572,
|
| 40135 |
+
"grad_norm": 2.0628461837768555,
|
| 40136 |
+
"learning_rate": 5.7190000000000006e-05,
|
| 40137 |
+
"loss": 1.293,
|
| 40138 |
+
"step": 5720
|
| 40139 |
+
},
|
| 40140 |
+
{
|
| 40141 |
+
"epoch": 0.0005721,
|
| 40142 |
+
"grad_norm": 2.522508382797241,
|
| 40143 |
+
"learning_rate": 5.72e-05,
|
| 40144 |
+
"loss": 1.2866,
|
| 40145 |
+
"step": 5721
|
| 40146 |
+
},
|
| 40147 |
+
{
|
| 40148 |
+
"epoch": 0.0005722,
|
| 40149 |
+
"grad_norm": 2.229628324508667,
|
| 40150 |
+
"learning_rate": 5.721e-05,
|
| 40151 |
+
"loss": 1.1309,
|
| 40152 |
+
"step": 5722
|
| 40153 |
+
},
|
| 40154 |
+
{
|
| 40155 |
+
"epoch": 0.0005723,
|
| 40156 |
+
"grad_norm": 5.912186622619629,
|
| 40157 |
+
"learning_rate": 5.7220000000000004e-05,
|
| 40158 |
+
"loss": 1.8047,
|
| 40159 |
+
"step": 5723
|
| 40160 |
+
},
|
| 40161 |
+
{
|
| 40162 |
+
"epoch": 0.0005724,
|
| 40163 |
+
"grad_norm": 3.1335630416870117,
|
| 40164 |
+
"learning_rate": 5.723e-05,
|
| 40165 |
+
"loss": 1.1792,
|
| 40166 |
+
"step": 5724
|
| 40167 |
+
},
|
| 40168 |
+
{
|
| 40169 |
+
"epoch": 0.0005725,
|
| 40170 |
+
"grad_norm": 2.22836971282959,
|
| 40171 |
+
"learning_rate": 5.724e-05,
|
| 40172 |
+
"loss": 1.0972,
|
| 40173 |
+
"step": 5725
|
| 40174 |
+
},
|
| 40175 |
+
{
|
| 40176 |
+
"epoch": 0.0005726,
|
| 40177 |
+
"grad_norm": 2.203935146331787,
|
| 40178 |
+
"learning_rate": 5.725e-05,
|
| 40179 |
+
"loss": 1.186,
|
| 40180 |
+
"step": 5726
|
| 40181 |
+
},
|
| 40182 |
+
{
|
| 40183 |
+
"epoch": 0.0005727,
|
| 40184 |
+
"grad_norm": 1.68068528175354,
|
| 40185 |
+
"learning_rate": 5.726e-05,
|
| 40186 |
+
"loss": 1.0049,
|
| 40187 |
+
"step": 5727
|
| 40188 |
+
},
|
| 40189 |
+
{
|
| 40190 |
+
"epoch": 0.0005728,
|
| 40191 |
+
"grad_norm": 2.190138339996338,
|
| 40192 |
+
"learning_rate": 5.727e-05,
|
| 40193 |
+
"loss": 1.1567,
|
| 40194 |
+
"step": 5728
|
| 40195 |
+
},
|
| 40196 |
+
{
|
| 40197 |
+
"epoch": 0.0005729,
|
| 40198 |
+
"grad_norm": 1.9706114530563354,
|
| 40199 |
+
"learning_rate": 5.728e-05,
|
| 40200 |
+
"loss": 1.0264,
|
| 40201 |
+
"step": 5729
|
| 40202 |
+
},
|
| 40203 |
+
{
|
| 40204 |
+
"epoch": 0.000573,
|
| 40205 |
+
"grad_norm": 1.7482571601867676,
|
| 40206 |
+
"learning_rate": 5.729e-05,
|
| 40207 |
+
"loss": 1.0039,
|
| 40208 |
+
"step": 5730
|
| 40209 |
+
},
|
| 40210 |
+
{
|
| 40211 |
+
"epoch": 0.0005731,
|
| 40212 |
+
"grad_norm": 2.326173782348633,
|
| 40213 |
+
"learning_rate": 5.73e-05,
|
| 40214 |
+
"loss": 1.0903,
|
| 40215 |
+
"step": 5731
|
| 40216 |
+
},
|
| 40217 |
+
{
|
| 40218 |
+
"epoch": 0.0005732,
|
| 40219 |
+
"grad_norm": 1.7929534912109375,
|
| 40220 |
+
"learning_rate": 5.731e-05,
|
| 40221 |
+
"loss": 0.9854,
|
| 40222 |
+
"step": 5732
|
| 40223 |
+
},
|
| 40224 |
+
{
|
| 40225 |
+
"epoch": 0.0005733,
|
| 40226 |
+
"grad_norm": 1.764266848564148,
|
| 40227 |
+
"learning_rate": 5.732000000000001e-05,
|
| 40228 |
+
"loss": 0.9575,
|
| 40229 |
+
"step": 5733
|
| 40230 |
+
},
|
| 40231 |
+
{
|
| 40232 |
+
"epoch": 0.0005734,
|
| 40233 |
+
"grad_norm": 2.5840072631835938,
|
| 40234 |
+
"learning_rate": 5.7329999999999995e-05,
|
| 40235 |
+
"loss": 1.3408,
|
| 40236 |
+
"step": 5734
|
| 40237 |
+
},
|
| 40238 |
+
{
|
| 40239 |
+
"epoch": 0.0005735,
|
| 40240 |
+
"grad_norm": 2.9217374324798584,
|
| 40241 |
+
"learning_rate": 5.7339999999999996e-05,
|
| 40242 |
+
"loss": 1.3711,
|
| 40243 |
+
"step": 5735
|
| 40244 |
+
},
|
| 40245 |
+
{
|
| 40246 |
+
"epoch": 0.0005736,
|
| 40247 |
+
"grad_norm": 3.844555616378784,
|
| 40248 |
+
"learning_rate": 5.7350000000000005e-05,
|
| 40249 |
+
"loss": 2.0879,
|
| 40250 |
+
"step": 5736
|
| 40251 |
+
},
|
| 40252 |
+
{
|
| 40253 |
+
"epoch": 0.0005737,
|
| 40254 |
+
"grad_norm": 2.118861198425293,
|
| 40255 |
+
"learning_rate": 5.7360000000000006e-05,
|
| 40256 |
+
"loss": 1.0342,
|
| 40257 |
+
"step": 5737
|
| 40258 |
+
},
|
| 40259 |
+
{
|
| 40260 |
+
"epoch": 0.0005738,
|
| 40261 |
+
"grad_norm": 3.1677181720733643,
|
| 40262 |
+
"learning_rate": 5.737e-05,
|
| 40263 |
+
"loss": 1.3975,
|
| 40264 |
+
"step": 5738
|
| 40265 |
+
},
|
| 40266 |
+
{
|
| 40267 |
+
"epoch": 0.0005739,
|
| 40268 |
+
"grad_norm": 2.005305051803589,
|
| 40269 |
+
"learning_rate": 5.738e-05,
|
| 40270 |
+
"loss": 1.063,
|
| 40271 |
+
"step": 5739
|
| 40272 |
+
},
|
| 40273 |
+
{
|
| 40274 |
+
"epoch": 0.000574,
|
| 40275 |
+
"grad_norm": 2.0784804821014404,
|
| 40276 |
+
"learning_rate": 5.7390000000000004e-05,
|
| 40277 |
+
"loss": 1.0684,
|
| 40278 |
+
"step": 5740
|
| 40279 |
+
},
|
| 40280 |
+
{
|
| 40281 |
+
"epoch": 0.0005741,
|
| 40282 |
+
"grad_norm": 3.071539878845215,
|
| 40283 |
+
"learning_rate": 5.74e-05,
|
| 40284 |
+
"loss": 1.2676,
|
| 40285 |
+
"step": 5741
|
| 40286 |
+
},
|
| 40287 |
+
{
|
| 40288 |
+
"epoch": 0.0005742,
|
| 40289 |
+
"grad_norm": 2.1717145442962646,
|
| 40290 |
+
"learning_rate": 5.741e-05,
|
| 40291 |
+
"loss": 1.0571,
|
| 40292 |
+
"step": 5742
|
| 40293 |
+
},
|
| 40294 |
+
{
|
| 40295 |
+
"epoch": 0.0005743,
|
| 40296 |
+
"grad_norm": 2.0021402835845947,
|
| 40297 |
+
"learning_rate": 5.742e-05,
|
| 40298 |
+
"loss": 1.0229,
|
| 40299 |
+
"step": 5743
|
| 40300 |
+
},
|
| 40301 |
+
{
|
| 40302 |
+
"epoch": 0.0005744,
|
| 40303 |
+
"grad_norm": 1.7721225023269653,
|
| 40304 |
+
"learning_rate": 5.743e-05,
|
| 40305 |
+
"loss": 0.9575,
|
| 40306 |
+
"step": 5744
|
| 40307 |
+
},
|
| 40308 |
+
{
|
| 40309 |
+
"epoch": 0.0005745,
|
| 40310 |
+
"grad_norm": 2.241121530532837,
|
| 40311 |
+
"learning_rate": 5.744e-05,
|
| 40312 |
+
"loss": 1.2036,
|
| 40313 |
+
"step": 5745
|
| 40314 |
+
},
|
| 40315 |
+
{
|
| 40316 |
+
"epoch": 0.0005746,
|
| 40317 |
+
"grad_norm": 2.1638896465301514,
|
| 40318 |
+
"learning_rate": 5.745e-05,
|
| 40319 |
+
"loss": 1.064,
|
| 40320 |
+
"step": 5746
|
| 40321 |
+
},
|
| 40322 |
+
{
|
| 40323 |
+
"epoch": 0.0005747,
|
| 40324 |
+
"grad_norm": 2.39729642868042,
|
| 40325 |
+
"learning_rate": 5.746e-05,
|
| 40326 |
+
"loss": 1.2822,
|
| 40327 |
+
"step": 5747
|
| 40328 |
+
},
|
| 40329 |
+
{
|
| 40330 |
+
"epoch": 0.0005748,
|
| 40331 |
+
"grad_norm": 2.052424907684326,
|
| 40332 |
+
"learning_rate": 5.747e-05,
|
| 40333 |
+
"loss": 1.0215,
|
| 40334 |
+
"step": 5748
|
| 40335 |
+
},
|
| 40336 |
+
{
|
| 40337 |
+
"epoch": 0.0005749,
|
| 40338 |
+
"grad_norm": 1.981493592262268,
|
| 40339 |
+
"learning_rate": 5.748e-05,
|
| 40340 |
+
"loss": 1.0049,
|
| 40341 |
+
"step": 5749
|
| 40342 |
+
},
|
| 40343 |
+
{
|
| 40344 |
+
"epoch": 0.000575,
|
| 40345 |
+
"grad_norm": 1.9867534637451172,
|
| 40346 |
+
"learning_rate": 5.749000000000001e-05,
|
| 40347 |
+
"loss": 1.0015,
|
| 40348 |
+
"step": 5750
|
| 40349 |
+
},
|
| 40350 |
+
{
|
| 40351 |
+
"epoch": 0.0005751,
|
| 40352 |
+
"grad_norm": 3.0150108337402344,
|
| 40353 |
+
"learning_rate": 5.7499999999999995e-05,
|
| 40354 |
+
"loss": 1.2261,
|
| 40355 |
+
"step": 5751
|
| 40356 |
+
},
|
| 40357 |
+
{
|
| 40358 |
+
"epoch": 0.0005752,
|
| 40359 |
+
"grad_norm": 1.9989426136016846,
|
| 40360 |
+
"learning_rate": 5.7509999999999997e-05,
|
| 40361 |
+
"loss": 0.9468,
|
| 40362 |
+
"step": 5752
|
| 40363 |
+
},
|
| 40364 |
+
{
|
| 40365 |
+
"epoch": 0.0005753,
|
| 40366 |
+
"grad_norm": 1.9769320487976074,
|
| 40367 |
+
"learning_rate": 5.7520000000000005e-05,
|
| 40368 |
+
"loss": 0.9893,
|
| 40369 |
+
"step": 5753
|
| 40370 |
+
},
|
| 40371 |
+
{
|
| 40372 |
+
"epoch": 0.0005754,
|
| 40373 |
+
"grad_norm": 1.8832272291183472,
|
| 40374 |
+
"learning_rate": 5.7530000000000007e-05,
|
| 40375 |
+
"loss": 0.9966,
|
| 40376 |
+
"step": 5754
|
| 40377 |
+
},
|
| 40378 |
+
{
|
| 40379 |
+
"epoch": 0.0005755,
|
| 40380 |
+
"grad_norm": 1.9896249771118164,
|
| 40381 |
+
"learning_rate": 5.754e-05,
|
| 40382 |
+
"loss": 1.0957,
|
| 40383 |
+
"step": 5755
|
| 40384 |
+
},
|
| 40385 |
+
{
|
| 40386 |
+
"epoch": 0.0005756,
|
| 40387 |
+
"grad_norm": 2.0165281295776367,
|
| 40388 |
+
"learning_rate": 5.755e-05,
|
| 40389 |
+
"loss": 1.061,
|
| 40390 |
+
"step": 5756
|
| 40391 |
+
},
|
| 40392 |
+
{
|
| 40393 |
+
"epoch": 0.0005757,
|
| 40394 |
+
"grad_norm": 4.255472660064697,
|
| 40395 |
+
"learning_rate": 5.7560000000000005e-05,
|
| 40396 |
+
"loss": 1.168,
|
| 40397 |
+
"step": 5757
|
| 40398 |
+
},
|
| 40399 |
+
{
|
| 40400 |
+
"epoch": 0.0005758,
|
| 40401 |
+
"grad_norm": 1.8657300472259521,
|
| 40402 |
+
"learning_rate": 5.757e-05,
|
| 40403 |
+
"loss": 0.9268,
|
| 40404 |
+
"step": 5758
|
| 40405 |
+
},
|
| 40406 |
+
{
|
| 40407 |
+
"epoch": 0.0005759,
|
| 40408 |
+
"grad_norm": 1.934298038482666,
|
| 40409 |
+
"learning_rate": 5.758e-05,
|
| 40410 |
+
"loss": 1.0547,
|
| 40411 |
+
"step": 5759
|
| 40412 |
+
},
|
| 40413 |
+
{
|
| 40414 |
+
"epoch": 0.000576,
|
| 40415 |
+
"grad_norm": 1.8616300821304321,
|
| 40416 |
+
"learning_rate": 5.759e-05,
|
| 40417 |
+
"loss": 0.9517,
|
| 40418 |
+
"step": 5760
|
| 40419 |
+
},
|
| 40420 |
+
{
|
| 40421 |
+
"epoch": 0.0005761,
|
| 40422 |
+
"grad_norm": 1.7131308317184448,
|
| 40423 |
+
"learning_rate": 5.76e-05,
|
| 40424 |
+
"loss": 0.9521,
|
| 40425 |
+
"step": 5761
|
| 40426 |
+
},
|
| 40427 |
+
{
|
| 40428 |
+
"epoch": 0.0005762,
|
| 40429 |
+
"grad_norm": 1.90117609500885,
|
| 40430 |
+
"learning_rate": 5.761e-05,
|
| 40431 |
+
"loss": 1.0596,
|
| 40432 |
+
"step": 5762
|
| 40433 |
+
},
|
| 40434 |
+
{
|
| 40435 |
+
"epoch": 0.0005763,
|
| 40436 |
+
"grad_norm": 2.66219425201416,
|
| 40437 |
+
"learning_rate": 5.762e-05,
|
| 40438 |
+
"loss": 1.1758,
|
| 40439 |
+
"step": 5763
|
| 40440 |
+
},
|
| 40441 |
+
{
|
| 40442 |
+
"epoch": 0.0005764,
|
| 40443 |
+
"grad_norm": 2.0648040771484375,
|
| 40444 |
+
"learning_rate": 5.763e-05,
|
| 40445 |
+
"loss": 1.2681,
|
| 40446 |
+
"step": 5764
|
| 40447 |
+
},
|
| 40448 |
+
{
|
| 40449 |
+
"epoch": 0.0005765,
|
| 40450 |
+
"grad_norm": 2.287407636642456,
|
| 40451 |
+
"learning_rate": 5.764e-05,
|
| 40452 |
+
"loss": 1.0923,
|
| 40453 |
+
"step": 5765
|
| 40454 |
+
},
|
| 40455 |
+
{
|
| 40456 |
+
"epoch": 0.0005766,
|
| 40457 |
+
"grad_norm": 2.0532753467559814,
|
| 40458 |
+
"learning_rate": 5.765e-05,
|
| 40459 |
+
"loss": 1.0767,
|
| 40460 |
+
"step": 5766
|
| 40461 |
+
},
|
| 40462 |
+
{
|
| 40463 |
+
"epoch": 0.0005767,
|
| 40464 |
+
"grad_norm": 1.9127421379089355,
|
| 40465 |
+
"learning_rate": 5.766000000000001e-05,
|
| 40466 |
+
"loss": 0.9902,
|
| 40467 |
+
"step": 5767
|
| 40468 |
+
},
|
| 40469 |
+
{
|
| 40470 |
+
"epoch": 0.0005768,
|
| 40471 |
+
"grad_norm": 1.6706316471099854,
|
| 40472 |
+
"learning_rate": 5.7669999999999995e-05,
|
| 40473 |
+
"loss": 0.915,
|
| 40474 |
+
"step": 5768
|
| 40475 |
+
},
|
| 40476 |
+
{
|
| 40477 |
+
"epoch": 0.0005769,
|
| 40478 |
+
"grad_norm": 2.3771395683288574,
|
| 40479 |
+
"learning_rate": 5.768e-05,
|
| 40480 |
+
"loss": 1.0625,
|
| 40481 |
+
"step": 5769
|
| 40482 |
+
},
|
| 40483 |
+
{
|
| 40484 |
+
"epoch": 0.000577,
|
| 40485 |
+
"grad_norm": 1.9100340604782104,
|
| 40486 |
+
"learning_rate": 5.7690000000000005e-05,
|
| 40487 |
+
"loss": 0.9658,
|
| 40488 |
+
"step": 5770
|
| 40489 |
+
},
|
| 40490 |
+
{
|
| 40491 |
+
"epoch": 0.0005771,
|
| 40492 |
+
"grad_norm": 1.868527889251709,
|
| 40493 |
+
"learning_rate": 5.7700000000000007e-05,
|
| 40494 |
+
"loss": 1.0156,
|
| 40495 |
+
"step": 5771
|
| 40496 |
+
},
|
| 40497 |
+
{
|
| 40498 |
+
"epoch": 0.0005772,
|
| 40499 |
+
"grad_norm": 1.7585581541061401,
|
| 40500 |
+
"learning_rate": 5.771e-05,
|
| 40501 |
+
"loss": 0.9268,
|
| 40502 |
+
"step": 5772
|
| 40503 |
+
},
|
| 40504 |
+
{
|
| 40505 |
+
"epoch": 0.0005773,
|
| 40506 |
+
"grad_norm": 1.8959033489227295,
|
| 40507 |
+
"learning_rate": 5.772e-05,
|
| 40508 |
+
"loss": 0.9536,
|
| 40509 |
+
"step": 5773
|
| 40510 |
+
},
|
| 40511 |
+
{
|
| 40512 |
+
"epoch": 0.0005774,
|
| 40513 |
+
"grad_norm": 1.867139458656311,
|
| 40514 |
+
"learning_rate": 5.7730000000000005e-05,
|
| 40515 |
+
"loss": 1.0,
|
| 40516 |
+
"step": 5774
|
| 40517 |
+
},
|
| 40518 |
+
{
|
| 40519 |
+
"epoch": 0.0005775,
|
| 40520 |
+
"grad_norm": 1.6358270645141602,
|
| 40521 |
+
"learning_rate": 5.774e-05,
|
| 40522 |
+
"loss": 0.9214,
|
| 40523 |
+
"step": 5775
|
| 40524 |
+
},
|
| 40525 |
+
{
|
| 40526 |
+
"epoch": 0.0005776,
|
| 40527 |
+
"grad_norm": 2.010411024093628,
|
| 40528 |
+
"learning_rate": 5.775e-05,
|
| 40529 |
+
"loss": 0.9609,
|
| 40530 |
+
"step": 5776
|
| 40531 |
+
},
|
| 40532 |
+
{
|
| 40533 |
+
"epoch": 0.0005777,
|
| 40534 |
+
"grad_norm": 1.5704444646835327,
|
| 40535 |
+
"learning_rate": 5.776e-05,
|
| 40536 |
+
"loss": 0.8735,
|
| 40537 |
+
"step": 5777
|
| 40538 |
+
},
|
| 40539 |
+
{
|
| 40540 |
+
"epoch": 0.0005778,
|
| 40541 |
+
"grad_norm": 1.827335000038147,
|
| 40542 |
+
"learning_rate": 5.777e-05,
|
| 40543 |
+
"loss": 0.9409,
|
| 40544 |
+
"step": 5778
|
| 40545 |
+
},
|
| 40546 |
+
{
|
| 40547 |
+
"epoch": 0.0005779,
|
| 40548 |
+
"grad_norm": 1.7069824934005737,
|
| 40549 |
+
"learning_rate": 5.778e-05,
|
| 40550 |
+
"loss": 0.8984,
|
| 40551 |
+
"step": 5779
|
| 40552 |
+
},
|
| 40553 |
+
{
|
| 40554 |
+
"epoch": 0.000578,
|
| 40555 |
+
"grad_norm": 1.6686686277389526,
|
| 40556 |
+
"learning_rate": 5.779e-05,
|
| 40557 |
+
"loss": 0.8862,
|
| 40558 |
+
"step": 5780
|
| 40559 |
+
},
|
| 40560 |
+
{
|
| 40561 |
+
"epoch": 0.0005781,
|
| 40562 |
+
"grad_norm": 2.480184316635132,
|
| 40563 |
+
"learning_rate": 5.78e-05,
|
| 40564 |
+
"loss": 1.0791,
|
| 40565 |
+
"step": 5781
|
| 40566 |
+
},
|
| 40567 |
+
{
|
| 40568 |
+
"epoch": 0.0005782,
|
| 40569 |
+
"grad_norm": 1.9088640213012695,
|
| 40570 |
+
"learning_rate": 5.781e-05,
|
| 40571 |
+
"loss": 1.0312,
|
| 40572 |
+
"step": 5782
|
| 40573 |
+
},
|
| 40574 |
+
{
|
| 40575 |
+
"epoch": 0.0005783,
|
| 40576 |
+
"grad_norm": 1.7736129760742188,
|
| 40577 |
+
"learning_rate": 5.782e-05,
|
| 40578 |
+
"loss": 0.9204,
|
| 40579 |
+
"step": 5783
|
| 40580 |
+
},
|
| 40581 |
+
{
|
| 40582 |
+
"epoch": 0.0005784,
|
| 40583 |
+
"grad_norm": 1.7554395198822021,
|
| 40584 |
+
"learning_rate": 5.783000000000001e-05,
|
| 40585 |
+
"loss": 0.9634,
|
| 40586 |
+
"step": 5784
|
| 40587 |
+
},
|
| 40588 |
+
{
|
| 40589 |
+
"epoch": 0.0005785,
|
| 40590 |
+
"grad_norm": 2.0152337551116943,
|
| 40591 |
+
"learning_rate": 5.7839999999999995e-05,
|
| 40592 |
+
"loss": 0.874,
|
| 40593 |
+
"step": 5785
|
| 40594 |
+
},
|
| 40595 |
+
{
|
| 40596 |
+
"epoch": 0.0005786,
|
| 40597 |
+
"grad_norm": 1.6878819465637207,
|
| 40598 |
+
"learning_rate": 5.785e-05,
|
| 40599 |
+
"loss": 0.9248,
|
| 40600 |
+
"step": 5786
|
| 40601 |
+
},
|
| 40602 |
+
{
|
| 40603 |
+
"epoch": 0.0005787,
|
| 40604 |
+
"grad_norm": 1.9107030630111694,
|
| 40605 |
+
"learning_rate": 5.7860000000000005e-05,
|
| 40606 |
+
"loss": 0.9609,
|
| 40607 |
+
"step": 5787
|
| 40608 |
+
},
|
| 40609 |
+
{
|
| 40610 |
+
"epoch": 0.0005788,
|
| 40611 |
+
"grad_norm": 1.958961009979248,
|
| 40612 |
+
"learning_rate": 5.787000000000001e-05,
|
| 40613 |
+
"loss": 1.0776,
|
| 40614 |
+
"step": 5788
|
| 40615 |
+
},
|
| 40616 |
+
{
|
| 40617 |
+
"epoch": 0.0005789,
|
| 40618 |
+
"grad_norm": 1.6951417922973633,
|
| 40619 |
+
"learning_rate": 5.788e-05,
|
| 40620 |
+
"loss": 0.8779,
|
| 40621 |
+
"step": 5789
|
| 40622 |
+
},
|
| 40623 |
+
{
|
| 40624 |
+
"epoch": 0.000579,
|
| 40625 |
+
"grad_norm": 2.0156352519989014,
|
| 40626 |
+
"learning_rate": 5.789e-05,
|
| 40627 |
+
"loss": 1.0054,
|
| 40628 |
+
"step": 5790
|
| 40629 |
+
},
|
| 40630 |
+
{
|
| 40631 |
+
"epoch": 0.0005791,
|
| 40632 |
+
"grad_norm": 1.7576463222503662,
|
| 40633 |
+
"learning_rate": 5.7900000000000005e-05,
|
| 40634 |
+
"loss": 0.855,
|
| 40635 |
+
"step": 5791
|
| 40636 |
+
},
|
| 40637 |
+
{
|
| 40638 |
+
"epoch": 0.0005792,
|
| 40639 |
+
"grad_norm": 1.4828929901123047,
|
| 40640 |
+
"learning_rate": 5.791e-05,
|
| 40641 |
+
"loss": 0.8442,
|
| 40642 |
+
"step": 5792
|
| 40643 |
+
},
|
| 40644 |
+
{
|
| 40645 |
+
"epoch": 0.0005793,
|
| 40646 |
+
"grad_norm": 1.69581937789917,
|
| 40647 |
+
"learning_rate": 5.792e-05,
|
| 40648 |
+
"loss": 0.9282,
|
| 40649 |
+
"step": 5793
|
| 40650 |
+
},
|
| 40651 |
+
{
|
| 40652 |
+
"epoch": 0.0005794,
|
| 40653 |
+
"grad_norm": 1.922916054725647,
|
| 40654 |
+
"learning_rate": 5.793e-05,
|
| 40655 |
+
"loss": 0.9702,
|
| 40656 |
+
"step": 5794
|
| 40657 |
+
},
|
| 40658 |
+
{
|
| 40659 |
+
"epoch": 0.0005795,
|
| 40660 |
+
"grad_norm": 2.0185506343841553,
|
| 40661 |
+
"learning_rate": 5.794e-05,
|
| 40662 |
+
"loss": 1.0571,
|
| 40663 |
+
"step": 5795
|
| 40664 |
+
},
|
| 40665 |
+
{
|
| 40666 |
+
"epoch": 0.0005796,
|
| 40667 |
+
"grad_norm": 1.7874513864517212,
|
| 40668 |
+
"learning_rate": 5.795e-05,
|
| 40669 |
+
"loss": 0.9526,
|
| 40670 |
+
"step": 5796
|
| 40671 |
+
},
|
| 40672 |
+
{
|
| 40673 |
+
"epoch": 0.0005797,
|
| 40674 |
+
"grad_norm": 1.6726452112197876,
|
| 40675 |
+
"learning_rate": 5.796e-05,
|
| 40676 |
+
"loss": 0.8789,
|
| 40677 |
+
"step": 5797
|
| 40678 |
+
},
|
| 40679 |
+
{
|
| 40680 |
+
"epoch": 0.0005798,
|
| 40681 |
+
"grad_norm": 1.81720769405365,
|
| 40682 |
+
"learning_rate": 5.797e-05,
|
| 40683 |
+
"loss": 0.9414,
|
| 40684 |
+
"step": 5798
|
| 40685 |
+
},
|
| 40686 |
+
{
|
| 40687 |
+
"epoch": 0.0005799,
|
| 40688 |
+
"grad_norm": 1.6245219707489014,
|
| 40689 |
+
"learning_rate": 5.798e-05,
|
| 40690 |
+
"loss": 0.8921,
|
| 40691 |
+
"step": 5799
|
| 40692 |
+
},
|
| 40693 |
+
{
|
| 40694 |
+
"epoch": 0.00058,
|
| 40695 |
+
"grad_norm": 1.8347564935684204,
|
| 40696 |
+
"learning_rate": 5.799e-05,
|
| 40697 |
+
"loss": 0.8745,
|
| 40698 |
+
"step": 5800
|
| 40699 |
+
},
|
| 40700 |
+
{
|
| 40701 |
+
"epoch": 0.0005801,
|
| 40702 |
+
"grad_norm": 1.843480110168457,
|
| 40703 |
+
"learning_rate": 5.800000000000001e-05,
|
| 40704 |
+
"loss": 0.9004,
|
| 40705 |
+
"step": 5801
|
| 40706 |
+
},
|
| 40707 |
+
{
|
| 40708 |
+
"epoch": 0.0005802,
|
| 40709 |
+
"grad_norm": 1.781453013420105,
|
| 40710 |
+
"learning_rate": 5.8009999999999995e-05,
|
| 40711 |
+
"loss": 0.9199,
|
| 40712 |
+
"step": 5802
|
| 40713 |
+
},
|
| 40714 |
+
{
|
| 40715 |
+
"epoch": 0.0005803,
|
| 40716 |
+
"grad_norm": 1.734071135520935,
|
| 40717 |
+
"learning_rate": 5.802e-05,
|
| 40718 |
+
"loss": 0.9146,
|
| 40719 |
+
"step": 5803
|
| 40720 |
+
},
|
| 40721 |
+
{
|
| 40722 |
+
"epoch": 0.0005804,
|
| 40723 |
+
"grad_norm": 1.772889256477356,
|
| 40724 |
+
"learning_rate": 5.8030000000000005e-05,
|
| 40725 |
+
"loss": 0.8794,
|
| 40726 |
+
"step": 5804
|
| 40727 |
+
},
|
| 40728 |
+
{
|
| 40729 |
+
"epoch": 0.0005805,
|
| 40730 |
+
"grad_norm": 2.4896934032440186,
|
| 40731 |
+
"learning_rate": 5.804000000000001e-05,
|
| 40732 |
+
"loss": 0.9492,
|
| 40733 |
+
"step": 5805
|
| 40734 |
+
},
|
| 40735 |
+
{
|
| 40736 |
+
"epoch": 0.0005806,
|
| 40737 |
+
"grad_norm": 1.6063295602798462,
|
| 40738 |
+
"learning_rate": 5.805e-05,
|
| 40739 |
+
"loss": 0.9087,
|
| 40740 |
+
"step": 5806
|
| 40741 |
+
},
|
| 40742 |
+
{
|
| 40743 |
+
"epoch": 0.0005807,
|
| 40744 |
+
"grad_norm": 1.9447569847106934,
|
| 40745 |
+
"learning_rate": 5.806e-05,
|
| 40746 |
+
"loss": 0.9951,
|
| 40747 |
+
"step": 5807
|
| 40748 |
+
},
|
| 40749 |
+
{
|
| 40750 |
+
"epoch": 0.0005808,
|
| 40751 |
+
"grad_norm": 1.6404564380645752,
|
| 40752 |
+
"learning_rate": 5.8070000000000005e-05,
|
| 40753 |
+
"loss": 0.8506,
|
| 40754 |
+
"step": 5808
|
| 40755 |
+
},
|
| 40756 |
+
{
|
| 40757 |
+
"epoch": 0.0005809,
|
| 40758 |
+
"grad_norm": 1.6724512577056885,
|
| 40759 |
+
"learning_rate": 5.808e-05,
|
| 40760 |
+
"loss": 0.8447,
|
| 40761 |
+
"step": 5809
|
| 40762 |
+
},
|
| 40763 |
+
{
|
| 40764 |
+
"epoch": 0.000581,
|
| 40765 |
+
"grad_norm": 1.8658446073532104,
|
| 40766 |
+
"learning_rate": 5.809e-05,
|
| 40767 |
+
"loss": 0.8677,
|
| 40768 |
+
"step": 5810
|
| 40769 |
+
},
|
| 40770 |
+
{
|
| 40771 |
+
"epoch": 0.0005811,
|
| 40772 |
+
"grad_norm": 1.774506688117981,
|
| 40773 |
+
"learning_rate": 5.81e-05,
|
| 40774 |
+
"loss": 0.8779,
|
| 40775 |
+
"step": 5811
|
| 40776 |
+
},
|
| 40777 |
+
{
|
| 40778 |
+
"epoch": 0.0005812,
|
| 40779 |
+
"grad_norm": 1.6786613464355469,
|
| 40780 |
+
"learning_rate": 5.811e-05,
|
| 40781 |
+
"loss": 0.8633,
|
| 40782 |
+
"step": 5812
|
| 40783 |
+
},
|
| 40784 |
+
{
|
| 40785 |
+
"epoch": 0.0005813,
|
| 40786 |
+
"grad_norm": 1.9051158428192139,
|
| 40787 |
+
"learning_rate": 5.812e-05,
|
| 40788 |
+
"loss": 0.9424,
|
| 40789 |
+
"step": 5813
|
| 40790 |
+
},
|
| 40791 |
+
{
|
| 40792 |
+
"epoch": 0.0005814,
|
| 40793 |
+
"grad_norm": 1.724073052406311,
|
| 40794 |
+
"learning_rate": 5.813e-05,
|
| 40795 |
+
"loss": 0.9126,
|
| 40796 |
+
"step": 5814
|
| 40797 |
+
},
|
| 40798 |
+
{
|
| 40799 |
+
"epoch": 0.0005815,
|
| 40800 |
+
"grad_norm": 1.6745671033859253,
|
| 40801 |
+
"learning_rate": 5.814e-05,
|
| 40802 |
+
"loss": 0.8604,
|
| 40803 |
+
"step": 5815
|
| 40804 |
+
},
|
| 40805 |
+
{
|
| 40806 |
+
"epoch": 0.0005816,
|
| 40807 |
+
"grad_norm": 1.6149710416793823,
|
| 40808 |
+
"learning_rate": 5.815e-05,
|
| 40809 |
+
"loss": 0.8623,
|
| 40810 |
+
"step": 5816
|
| 40811 |
+
},
|
| 40812 |
+
{
|
| 40813 |
+
"epoch": 0.0005817,
|
| 40814 |
+
"grad_norm": 1.640714168548584,
|
| 40815 |
+
"learning_rate": 5.816e-05,
|
| 40816 |
+
"loss": 0.8633,
|
| 40817 |
+
"step": 5817
|
| 40818 |
+
},
|
| 40819 |
+
{
|
| 40820 |
+
"epoch": 0.0005818,
|
| 40821 |
+
"grad_norm": 1.8054769039154053,
|
| 40822 |
+
"learning_rate": 5.817000000000001e-05,
|
| 40823 |
+
"loss": 0.8481,
|
| 40824 |
+
"step": 5818
|
| 40825 |
+
},
|
| 40826 |
+
{
|
| 40827 |
+
"epoch": 0.0005819,
|
| 40828 |
+
"grad_norm": 1.6028062105178833,
|
| 40829 |
+
"learning_rate": 5.8179999999999995e-05,
|
| 40830 |
+
"loss": 0.8555,
|
| 40831 |
+
"step": 5819
|
| 40832 |
+
},
|
| 40833 |
+
{
|
| 40834 |
+
"epoch": 0.000582,
|
| 40835 |
+
"grad_norm": 1.8541170358657837,
|
| 40836 |
+
"learning_rate": 5.819e-05,
|
| 40837 |
+
"loss": 0.9297,
|
| 40838 |
+
"step": 5820
|
| 40839 |
+
},
|
| 40840 |
+
{
|
| 40841 |
+
"epoch": 0.0005821,
|
| 40842 |
+
"grad_norm": 1.7812132835388184,
|
| 40843 |
+
"learning_rate": 5.8200000000000005e-05,
|
| 40844 |
+
"loss": 0.916,
|
| 40845 |
+
"step": 5821
|
| 40846 |
+
},
|
| 40847 |
+
{
|
| 40848 |
+
"epoch": 0.0005822,
|
| 40849 |
+
"grad_norm": 1.6715450286865234,
|
| 40850 |
+
"learning_rate": 5.821000000000001e-05,
|
| 40851 |
+
"loss": 0.876,
|
| 40852 |
+
"step": 5822
|
| 40853 |
+
},
|
| 40854 |
+
{
|
| 40855 |
+
"epoch": 0.0005823,
|
| 40856 |
+
"grad_norm": 1.883423089981079,
|
| 40857 |
+
"learning_rate": 5.822e-05,
|
| 40858 |
+
"loss": 1.061,
|
| 40859 |
+
"step": 5823
|
| 40860 |
+
},
|
| 40861 |
+
{
|
| 40862 |
+
"epoch": 0.0005824,
|
| 40863 |
+
"grad_norm": 1.6956429481506348,
|
| 40864 |
+
"learning_rate": 5.823e-05,
|
| 40865 |
+
"loss": 0.8438,
|
| 40866 |
+
"step": 5824
|
| 40867 |
+
},
|
| 40868 |
+
{
|
| 40869 |
+
"epoch": 0.0005825,
|
| 40870 |
+
"grad_norm": 1.6848008632659912,
|
| 40871 |
+
"learning_rate": 5.8240000000000005e-05,
|
| 40872 |
+
"loss": 0.8887,
|
| 40873 |
+
"step": 5825
|
| 40874 |
+
},
|
| 40875 |
+
{
|
| 40876 |
+
"epoch": 0.0005826,
|
| 40877 |
+
"grad_norm": 1.761123776435852,
|
| 40878 |
+
"learning_rate": 5.825e-05,
|
| 40879 |
+
"loss": 0.9277,
|
| 40880 |
+
"step": 5826
|
| 40881 |
+
},
|
| 40882 |
+
{
|
| 40883 |
+
"epoch": 0.0005827,
|
| 40884 |
+
"grad_norm": 4.005990028381348,
|
| 40885 |
+
"learning_rate": 5.826e-05,
|
| 40886 |
+
"loss": 1.5615,
|
| 40887 |
+
"step": 5827
|
| 40888 |
+
},
|
| 40889 |
+
{
|
| 40890 |
+
"epoch": 0.0005828,
|
| 40891 |
+
"grad_norm": 2.3055953979492188,
|
| 40892 |
+
"learning_rate": 5.827e-05,
|
| 40893 |
+
"loss": 0.9722,
|
| 40894 |
+
"step": 5828
|
| 40895 |
+
},
|
| 40896 |
+
{
|
| 40897 |
+
"epoch": 0.0005829,
|
| 40898 |
+
"grad_norm": 3.2178542613983154,
|
| 40899 |
+
"learning_rate": 5.828e-05,
|
| 40900 |
+
"loss": 1.0449,
|
| 40901 |
+
"step": 5829
|
| 40902 |
+
},
|
| 40903 |
+
{
|
| 40904 |
+
"epoch": 0.000583,
|
| 40905 |
+
"grad_norm": 2.7492799758911133,
|
| 40906 |
+
"learning_rate": 5.829e-05,
|
| 40907 |
+
"loss": 0.9448,
|
| 40908 |
+
"step": 5830
|
| 40909 |
+
},
|
| 40910 |
+
{
|
| 40911 |
+
"epoch": 0.0005831,
|
| 40912 |
+
"grad_norm": 1.996016502380371,
|
| 40913 |
+
"learning_rate": 5.83e-05,
|
| 40914 |
+
"loss": 0.8779,
|
| 40915 |
+
"step": 5831
|
| 40916 |
+
},
|
| 40917 |
+
{
|
| 40918 |
+
"epoch": 0.0005832,
|
| 40919 |
+
"grad_norm": 1.6616058349609375,
|
| 40920 |
+
"learning_rate": 5.831e-05,
|
| 40921 |
+
"loss": 0.7661,
|
| 40922 |
+
"step": 5832
|
| 40923 |
+
},
|
| 40924 |
+
{
|
| 40925 |
+
"epoch": 0.0005833,
|
| 40926 |
+
"grad_norm": 1.789231300354004,
|
| 40927 |
+
"learning_rate": 5.832e-05,
|
| 40928 |
+
"loss": 0.835,
|
| 40929 |
+
"step": 5833
|
| 40930 |
+
},
|
| 40931 |
+
{
|
| 40932 |
+
"epoch": 0.0005834,
|
| 40933 |
+
"grad_norm": 1.9673794507980347,
|
| 40934 |
+
"learning_rate": 5.833e-05,
|
| 40935 |
+
"loss": 0.8794,
|
| 40936 |
+
"step": 5834
|
| 40937 |
+
},
|
| 40938 |
+
{
|
| 40939 |
+
"epoch": 0.0005835,
|
| 40940 |
+
"grad_norm": 1.7308722734451294,
|
| 40941 |
+
"learning_rate": 5.834000000000001e-05,
|
| 40942 |
+
"loss": 0.8584,
|
| 40943 |
+
"step": 5835
|
| 40944 |
+
},
|
| 40945 |
+
{
|
| 40946 |
+
"epoch": 0.0005836,
|
| 40947 |
+
"grad_norm": 1.6586679220199585,
|
| 40948 |
+
"learning_rate": 5.8349999999999995e-05,
|
| 40949 |
+
"loss": 0.8403,
|
| 40950 |
+
"step": 5836
|
| 40951 |
+
},
|
| 40952 |
+
{
|
| 40953 |
+
"epoch": 0.0005837,
|
| 40954 |
+
"grad_norm": 1.7169954776763916,
|
| 40955 |
+
"learning_rate": 5.836e-05,
|
| 40956 |
+
"loss": 0.8477,
|
| 40957 |
+
"step": 5837
|
| 40958 |
+
},
|
| 40959 |
+
{
|
| 40960 |
+
"epoch": 0.0005838,
|
| 40961 |
+
"grad_norm": 1.6661771535873413,
|
| 40962 |
+
"learning_rate": 5.8370000000000005e-05,
|
| 40963 |
+
"loss": 0.8574,
|
| 40964 |
+
"step": 5838
|
| 40965 |
+
},
|
| 40966 |
+
{
|
| 40967 |
+
"epoch": 0.0005839,
|
| 40968 |
+
"grad_norm": 1.6541742086410522,
|
| 40969 |
+
"learning_rate": 5.838000000000001e-05,
|
| 40970 |
+
"loss": 0.7949,
|
| 40971 |
+
"step": 5839
|
| 40972 |
+
},
|
| 40973 |
+
{
|
| 40974 |
+
"epoch": 0.000584,
|
| 40975 |
+
"grad_norm": 1.5927313566207886,
|
| 40976 |
+
"learning_rate": 5.839e-05,
|
| 40977 |
+
"loss": 0.7852,
|
| 40978 |
+
"step": 5840
|
| 40979 |
+
},
|
| 40980 |
+
{
|
| 40981 |
+
"epoch": 0.0005841,
|
| 40982 |
+
"grad_norm": 1.6425414085388184,
|
| 40983 |
+
"learning_rate": 5.84e-05,
|
| 40984 |
+
"loss": 0.8096,
|
| 40985 |
+
"step": 5841
|
| 40986 |
+
},
|
| 40987 |
+
{
|
| 40988 |
+
"epoch": 0.0005842,
|
| 40989 |
+
"grad_norm": 1.7989174127578735,
|
| 40990 |
+
"learning_rate": 5.8410000000000005e-05,
|
| 40991 |
+
"loss": 0.8882,
|
| 40992 |
+
"step": 5842
|
| 40993 |
+
},
|
| 40994 |
+
{
|
| 40995 |
+
"epoch": 0.0005843,
|
| 40996 |
+
"grad_norm": 1.8322278261184692,
|
| 40997 |
+
"learning_rate": 5.842e-05,
|
| 40998 |
+
"loss": 1.001,
|
| 40999 |
+
"step": 5843
|
| 41000 |
+
},
|
| 41001 |
+
{
|
| 41002 |
+
"epoch": 0.0005844,
|
| 41003 |
+
"grad_norm": 1.8731627464294434,
|
| 41004 |
+
"learning_rate": 5.843e-05,
|
| 41005 |
+
"loss": 0.8843,
|
| 41006 |
+
"step": 5844
|
| 41007 |
+
},
|
| 41008 |
+
{
|
| 41009 |
+
"epoch": 0.0005845,
|
| 41010 |
+
"grad_norm": 1.5437366962432861,
|
| 41011 |
+
"learning_rate": 5.844e-05,
|
| 41012 |
+
"loss": 0.7725,
|
| 41013 |
+
"step": 5845
|
| 41014 |
+
},
|
| 41015 |
+
{
|
| 41016 |
+
"epoch": 0.0005846,
|
| 41017 |
+
"grad_norm": 2.8317415714263916,
|
| 41018 |
+
"learning_rate": 5.845e-05,
|
| 41019 |
+
"loss": 1.0942,
|
| 41020 |
+
"step": 5846
|
| 41021 |
+
},
|
| 41022 |
+
{
|
| 41023 |
+
"epoch": 0.0005847,
|
| 41024 |
+
"grad_norm": 1.7654151916503906,
|
| 41025 |
+
"learning_rate": 5.846e-05,
|
| 41026 |
+
"loss": 0.8237,
|
| 41027 |
+
"step": 5847
|
| 41028 |
+
},
|
| 41029 |
+
{
|
| 41030 |
+
"epoch": 0.0005848,
|
| 41031 |
+
"grad_norm": 1.8651440143585205,
|
| 41032 |
+
"learning_rate": 5.847e-05,
|
| 41033 |
+
"loss": 0.812,
|
| 41034 |
+
"step": 5848
|
| 41035 |
+
},
|
| 41036 |
+
{
|
| 41037 |
+
"epoch": 0.0005849,
|
| 41038 |
+
"grad_norm": 1.6400736570358276,
|
| 41039 |
+
"learning_rate": 5.848e-05,
|
| 41040 |
+
"loss": 0.7842,
|
| 41041 |
+
"step": 5849
|
| 41042 |
+
},
|
| 41043 |
+
{
|
| 41044 |
+
"epoch": 0.000585,
|
| 41045 |
+
"grad_norm": 1.6788883209228516,
|
| 41046 |
+
"learning_rate": 5.849e-05,
|
| 41047 |
+
"loss": 0.8096,
|
| 41048 |
+
"step": 5850
|
| 41049 |
+
},
|
| 41050 |
+
{
|
| 41051 |
+
"epoch": 0.0005851,
|
| 41052 |
+
"grad_norm": 2.2326557636260986,
|
| 41053 |
+
"learning_rate": 5.85e-05,
|
| 41054 |
+
"loss": 0.96,
|
| 41055 |
+
"step": 5851
|
| 41056 |
+
},
|
| 41057 |
+
{
|
| 41058 |
+
"epoch": 0.0005852,
|
| 41059 |
+
"grad_norm": 1.8667100667953491,
|
| 41060 |
+
"learning_rate": 5.851000000000001e-05,
|
| 41061 |
+
"loss": 0.8208,
|
| 41062 |
+
"step": 5852
|
| 41063 |
+
},
|
| 41064 |
+
{
|
| 41065 |
+
"epoch": 0.0005853,
|
| 41066 |
+
"grad_norm": 1.6106916666030884,
|
| 41067 |
+
"learning_rate": 5.8519999999999995e-05,
|
| 41068 |
+
"loss": 0.7939,
|
| 41069 |
+
"step": 5853
|
| 41070 |
+
},
|
| 41071 |
+
{
|
| 41072 |
+
"epoch": 0.0005854,
|
| 41073 |
+
"grad_norm": 1.6850008964538574,
|
| 41074 |
+
"learning_rate": 5.853e-05,
|
| 41075 |
+
"loss": 0.8037,
|
| 41076 |
+
"step": 5854
|
| 41077 |
+
},
|
| 41078 |
+
{
|
| 41079 |
+
"epoch": 0.0005855,
|
| 41080 |
+
"grad_norm": 1.6820380687713623,
|
| 41081 |
+
"learning_rate": 5.8540000000000005e-05,
|
| 41082 |
+
"loss": 0.7905,
|
| 41083 |
+
"step": 5855
|
| 41084 |
+
},
|
| 41085 |
+
{
|
| 41086 |
+
"epoch": 0.0005856,
|
| 41087 |
+
"grad_norm": 1.6166424751281738,
|
| 41088 |
+
"learning_rate": 5.855000000000001e-05,
|
| 41089 |
+
"loss": 0.7661,
|
| 41090 |
+
"step": 5856
|
| 41091 |
+
},
|
| 41092 |
+
{
|
| 41093 |
+
"epoch": 0.0005857,
|
| 41094 |
+
"grad_norm": 1.606998085975647,
|
| 41095 |
+
"learning_rate": 5.856e-05,
|
| 41096 |
+
"loss": 0.8359,
|
| 41097 |
+
"step": 5857
|
| 41098 |
+
},
|
| 41099 |
+
{
|
| 41100 |
+
"epoch": 0.0005858,
|
| 41101 |
+
"grad_norm": 1.6939175128936768,
|
| 41102 |
+
"learning_rate": 5.857e-05,
|
| 41103 |
+
"loss": 0.8188,
|
| 41104 |
+
"step": 5858
|
| 41105 |
+
},
|
| 41106 |
+
{
|
| 41107 |
+
"epoch": 0.0005859,
|
| 41108 |
+
"grad_norm": 1.7543524503707886,
|
| 41109 |
+
"learning_rate": 5.8580000000000005e-05,
|
| 41110 |
+
"loss": 0.8491,
|
| 41111 |
+
"step": 5859
|
| 41112 |
+
},
|
| 41113 |
+
{
|
| 41114 |
+
"epoch": 0.000586,
|
| 41115 |
+
"grad_norm": 1.7315897941589355,
|
| 41116 |
+
"learning_rate": 5.859e-05,
|
| 41117 |
+
"loss": 0.8999,
|
| 41118 |
+
"step": 5860
|
| 41119 |
+
},
|
| 41120 |
+
{
|
| 41121 |
+
"epoch": 0.0005861,
|
| 41122 |
+
"grad_norm": 2.3954813480377197,
|
| 41123 |
+
"learning_rate": 5.86e-05,
|
| 41124 |
+
"loss": 1.1616,
|
| 41125 |
+
"step": 5861
|
| 41126 |
+
},
|
| 41127 |
+
{
|
| 41128 |
+
"epoch": 0.0005862,
|
| 41129 |
+
"grad_norm": 1.6844712495803833,
|
| 41130 |
+
"learning_rate": 5.861e-05,
|
| 41131 |
+
"loss": 0.772,
|
| 41132 |
+
"step": 5862
|
| 41133 |
+
},
|
| 41134 |
+
{
|
| 41135 |
+
"epoch": 0.0005863,
|
| 41136 |
+
"grad_norm": 1.5823957920074463,
|
| 41137 |
+
"learning_rate": 5.862e-05,
|
| 41138 |
+
"loss": 0.8071,
|
| 41139 |
+
"step": 5863
|
| 41140 |
+
},
|
| 41141 |
+
{
|
| 41142 |
+
"epoch": 0.0005864,
|
| 41143 |
+
"grad_norm": 1.5627663135528564,
|
| 41144 |
+
"learning_rate": 5.863e-05,
|
| 41145 |
+
"loss": 0.812,
|
| 41146 |
+
"step": 5864
|
| 41147 |
+
},
|
| 41148 |
+
{
|
| 41149 |
+
"epoch": 0.0005865,
|
| 41150 |
+
"grad_norm": 1.6954442262649536,
|
| 41151 |
+
"learning_rate": 5.864e-05,
|
| 41152 |
+
"loss": 0.7988,
|
| 41153 |
+
"step": 5865
|
| 41154 |
+
},
|
| 41155 |
+
{
|
| 41156 |
+
"epoch": 0.0005866,
|
| 41157 |
+
"grad_norm": 1.7571743726730347,
|
| 41158 |
+
"learning_rate": 5.865e-05,
|
| 41159 |
+
"loss": 0.7607,
|
| 41160 |
+
"step": 5866
|
| 41161 |
+
},
|
| 41162 |
+
{
|
| 41163 |
+
"epoch": 0.0005867,
|
| 41164 |
+
"grad_norm": 1.6370595693588257,
|
| 41165 |
+
"learning_rate": 5.866e-05,
|
| 41166 |
+
"loss": 0.7529,
|
| 41167 |
+
"step": 5867
|
| 41168 |
+
},
|
| 41169 |
+
{
|
| 41170 |
+
"epoch": 0.0005868,
|
| 41171 |
+
"grad_norm": 1.5386178493499756,
|
| 41172 |
+
"learning_rate": 5.867e-05,
|
| 41173 |
+
"loss": 0.8086,
|
| 41174 |
+
"step": 5868
|
| 41175 |
+
},
|
| 41176 |
+
{
|
| 41177 |
+
"epoch": 0.0005869,
|
| 41178 |
+
"grad_norm": 1.5495586395263672,
|
| 41179 |
+
"learning_rate": 5.868000000000001e-05,
|
| 41180 |
+
"loss": 0.7627,
|
| 41181 |
+
"step": 5869
|
| 41182 |
+
},
|
| 41183 |
+
{
|
| 41184 |
+
"epoch": 0.000587,
|
| 41185 |
+
"grad_norm": 1.72905695438385,
|
| 41186 |
+
"learning_rate": 5.8689999999999995e-05,
|
| 41187 |
+
"loss": 0.8296,
|
| 41188 |
+
"step": 5870
|
| 41189 |
+
},
|
| 41190 |
+
{
|
| 41191 |
+
"epoch": 0.0005871,
|
| 41192 |
+
"grad_norm": 1.6074451208114624,
|
| 41193 |
+
"learning_rate": 5.87e-05,
|
| 41194 |
+
"loss": 0.876,
|
| 41195 |
+
"step": 5871
|
| 41196 |
+
},
|
| 41197 |
+
{
|
| 41198 |
+
"epoch": 0.0005872,
|
| 41199 |
+
"grad_norm": 1.5799434185028076,
|
| 41200 |
+
"learning_rate": 5.8710000000000005e-05,
|
| 41201 |
+
"loss": 0.8086,
|
| 41202 |
+
"step": 5872
|
| 41203 |
+
},
|
| 41204 |
+
{
|
| 41205 |
+
"epoch": 0.0005873,
|
| 41206 |
+
"grad_norm": 1.7185566425323486,
|
| 41207 |
+
"learning_rate": 5.872000000000001e-05,
|
| 41208 |
+
"loss": 0.873,
|
| 41209 |
+
"step": 5873
|
| 41210 |
+
},
|
| 41211 |
+
{
|
| 41212 |
+
"epoch": 0.0005874,
|
| 41213 |
+
"grad_norm": 1.6175681352615356,
|
| 41214 |
+
"learning_rate": 5.873e-05,
|
| 41215 |
+
"loss": 0.7603,
|
| 41216 |
+
"step": 5874
|
| 41217 |
+
},
|
| 41218 |
+
{
|
| 41219 |
+
"epoch": 0.0005875,
|
| 41220 |
+
"grad_norm": 1.3885085582733154,
|
| 41221 |
+
"learning_rate": 5.8740000000000003e-05,
|
| 41222 |
+
"loss": 0.8052,
|
| 41223 |
+
"step": 5875
|
| 41224 |
+
},
|
| 41225 |
+
{
|
| 41226 |
+
"epoch": 0.0005876,
|
| 41227 |
+
"grad_norm": 1.5084010362625122,
|
| 41228 |
+
"learning_rate": 5.8750000000000005e-05,
|
| 41229 |
+
"loss": 0.8022,
|
| 41230 |
+
"step": 5876
|
| 41231 |
+
},
|
| 41232 |
+
{
|
| 41233 |
+
"epoch": 0.0005877,
|
| 41234 |
+
"grad_norm": 1.5180604457855225,
|
| 41235 |
+
"learning_rate": 5.876e-05,
|
| 41236 |
+
"loss": 0.7681,
|
| 41237 |
+
"step": 5877
|
| 41238 |
+
},
|
| 41239 |
+
{
|
| 41240 |
+
"epoch": 0.0005878,
|
| 41241 |
+
"grad_norm": 1.5178165435791016,
|
| 41242 |
+
"learning_rate": 5.877e-05,
|
| 41243 |
+
"loss": 0.7559,
|
| 41244 |
+
"step": 5878
|
| 41245 |
+
},
|
| 41246 |
+
{
|
| 41247 |
+
"epoch": 0.0005879,
|
| 41248 |
+
"grad_norm": 1.4072206020355225,
|
| 41249 |
+
"learning_rate": 5.878e-05,
|
| 41250 |
+
"loss": 0.7432,
|
| 41251 |
+
"step": 5879
|
| 41252 |
+
},
|
| 41253 |
+
{
|
| 41254 |
+
"epoch": 0.000588,
|
| 41255 |
+
"grad_norm": 1.6438937187194824,
|
| 41256 |
+
"learning_rate": 5.879e-05,
|
| 41257 |
+
"loss": 0.8389,
|
| 41258 |
+
"step": 5880
|
| 41259 |
+
},
|
| 41260 |
+
{
|
| 41261 |
+
"epoch": 0.0005881,
|
| 41262 |
+
"grad_norm": 1.6769683361053467,
|
| 41263 |
+
"learning_rate": 5.88e-05,
|
| 41264 |
+
"loss": 0.8018,
|
| 41265 |
+
"step": 5881
|
| 41266 |
+
},
|
| 41267 |
+
{
|
| 41268 |
+
"epoch": 0.0005882,
|
| 41269 |
+
"grad_norm": 1.623231291770935,
|
| 41270 |
+
"learning_rate": 5.881e-05,
|
| 41271 |
+
"loss": 0.7739,
|
| 41272 |
+
"step": 5882
|
| 41273 |
+
},
|
| 41274 |
+
{
|
| 41275 |
+
"epoch": 0.0005883,
|
| 41276 |
+
"grad_norm": 1.6477833986282349,
|
| 41277 |
+
"learning_rate": 5.882e-05,
|
| 41278 |
+
"loss": 0.7993,
|
| 41279 |
+
"step": 5883
|
| 41280 |
+
},
|
| 41281 |
+
{
|
| 41282 |
+
"epoch": 0.0005884,
|
| 41283 |
+
"grad_norm": 1.6617273092269897,
|
| 41284 |
+
"learning_rate": 5.883e-05,
|
| 41285 |
+
"loss": 0.8618,
|
| 41286 |
+
"step": 5884
|
| 41287 |
+
},
|
| 41288 |
+
{
|
| 41289 |
+
"epoch": 0.0005885,
|
| 41290 |
+
"grad_norm": 2.173892021179199,
|
| 41291 |
+
"learning_rate": 5.884e-05,
|
| 41292 |
+
"loss": 1.0137,
|
| 41293 |
+
"step": 5885
|
| 41294 |
+
},
|
| 41295 |
+
{
|
| 41296 |
+
"epoch": 0.0005886,
|
| 41297 |
+
"grad_norm": 1.5440952777862549,
|
| 41298 |
+
"learning_rate": 5.885000000000001e-05,
|
| 41299 |
+
"loss": 0.7686,
|
| 41300 |
+
"step": 5886
|
| 41301 |
+
},
|
| 41302 |
+
{
|
| 41303 |
+
"epoch": 0.0005887,
|
| 41304 |
+
"grad_norm": 1.2644119262695312,
|
| 41305 |
+
"learning_rate": 5.8859999999999995e-05,
|
| 41306 |
+
"loss": 0.7139,
|
| 41307 |
+
"step": 5887
|
| 41308 |
+
},
|
| 41309 |
+
{
|
| 41310 |
+
"epoch": 0.0005888,
|
| 41311 |
+
"grad_norm": 1.484767198562622,
|
| 41312 |
+
"learning_rate": 5.887e-05,
|
| 41313 |
+
"loss": 0.7417,
|
| 41314 |
+
"step": 5888
|
| 41315 |
+
},
|
| 41316 |
+
{
|
| 41317 |
+
"epoch": 0.0005889,
|
| 41318 |
+
"grad_norm": 1.5068488121032715,
|
| 41319 |
+
"learning_rate": 5.8880000000000005e-05,
|
| 41320 |
+
"loss": 0.7974,
|
| 41321 |
+
"step": 5889
|
| 41322 |
+
},
|
| 41323 |
+
{
|
| 41324 |
+
"epoch": 0.000589,
|
| 41325 |
+
"grad_norm": 3.0165534019470215,
|
| 41326 |
+
"learning_rate": 5.889000000000001e-05,
|
| 41327 |
+
"loss": 0.812,
|
| 41328 |
+
"step": 5890
|
| 41329 |
+
},
|
| 41330 |
+
{
|
| 41331 |
+
"epoch": 0.0005891,
|
| 41332 |
+
"grad_norm": 1.8161137104034424,
|
| 41333 |
+
"learning_rate": 5.89e-05,
|
| 41334 |
+
"loss": 0.8296,
|
| 41335 |
+
"step": 5891
|
| 41336 |
+
},
|
| 41337 |
+
{
|
| 41338 |
+
"epoch": 0.0005892,
|
| 41339 |
+
"grad_norm": 1.7120927572250366,
|
| 41340 |
+
"learning_rate": 5.8910000000000003e-05,
|
| 41341 |
+
"loss": 0.7227,
|
| 41342 |
+
"step": 5892
|
| 41343 |
+
},
|
| 41344 |
+
{
|
| 41345 |
+
"epoch": 0.0005893,
|
| 41346 |
+
"grad_norm": 1.631578803062439,
|
| 41347 |
+
"learning_rate": 5.8920000000000005e-05,
|
| 41348 |
+
"loss": 0.7925,
|
| 41349 |
+
"step": 5893
|
| 41350 |
+
},
|
| 41351 |
+
{
|
| 41352 |
+
"epoch": 0.0005894,
|
| 41353 |
+
"grad_norm": 1.703325629234314,
|
| 41354 |
+
"learning_rate": 5.893e-05,
|
| 41355 |
+
"loss": 0.8032,
|
| 41356 |
+
"step": 5894
|
| 41357 |
+
},
|
| 41358 |
+
{
|
| 41359 |
+
"epoch": 0.0005895,
|
| 41360 |
+
"grad_norm": 1.7086197137832642,
|
| 41361 |
+
"learning_rate": 5.894e-05,
|
| 41362 |
+
"loss": 0.8003,
|
| 41363 |
+
"step": 5895
|
| 41364 |
+
},
|
| 41365 |
+
{
|
| 41366 |
+
"epoch": 0.0005896,
|
| 41367 |
+
"grad_norm": 5.005209922790527,
|
| 41368 |
+
"learning_rate": 5.895e-05,
|
| 41369 |
+
"loss": 1.2173,
|
| 41370 |
+
"step": 5896
|
| 41371 |
+
},
|
| 41372 |
+
{
|
| 41373 |
+
"epoch": 0.0005897,
|
| 41374 |
+
"grad_norm": 1.6591870784759521,
|
| 41375 |
+
"learning_rate": 5.896e-05,
|
| 41376 |
+
"loss": 0.7886,
|
| 41377 |
+
"step": 5897
|
| 41378 |
+
},
|
| 41379 |
+
{
|
| 41380 |
+
"epoch": 0.0005898,
|
| 41381 |
+
"grad_norm": 1.8780789375305176,
|
| 41382 |
+
"learning_rate": 5.897e-05,
|
| 41383 |
+
"loss": 0.832,
|
| 41384 |
+
"step": 5898
|
| 41385 |
+
},
|
| 41386 |
+
{
|
| 41387 |
+
"epoch": 0.0005899,
|
| 41388 |
+
"grad_norm": 1.866897702217102,
|
| 41389 |
+
"learning_rate": 5.898e-05,
|
| 41390 |
+
"loss": 0.8823,
|
| 41391 |
+
"step": 5899
|
| 41392 |
+
},
|
| 41393 |
+
{
|
| 41394 |
+
"epoch": 0.00059,
|
| 41395 |
+
"grad_norm": 1.6844897270202637,
|
| 41396 |
+
"learning_rate": 5.899e-05,
|
| 41397 |
+
"loss": 0.856,
|
| 41398 |
+
"step": 5900
|
| 41399 |
+
},
|
| 41400 |
+
{
|
| 41401 |
+
"epoch": 0.0005901,
|
| 41402 |
+
"grad_norm": 1.6563727855682373,
|
| 41403 |
+
"learning_rate": 5.9e-05,
|
| 41404 |
+
"loss": 0.8486,
|
| 41405 |
+
"step": 5901
|
| 41406 |
+
},
|
| 41407 |
+
{
|
| 41408 |
+
"epoch": 0.0005902,
|
| 41409 |
+
"grad_norm": 1.4859638214111328,
|
| 41410 |
+
"learning_rate": 5.901e-05,
|
| 41411 |
+
"loss": 0.7632,
|
| 41412 |
+
"step": 5902
|
| 41413 |
+
},
|
| 41414 |
+
{
|
| 41415 |
+
"epoch": 0.0005903,
|
| 41416 |
+
"grad_norm": 1.5815120935440063,
|
| 41417 |
+
"learning_rate": 5.902000000000001e-05,
|
| 41418 |
+
"loss": 0.7725,
|
| 41419 |
+
"step": 5903
|
| 41420 |
+
},
|
| 41421 |
+
{
|
| 41422 |
+
"epoch": 0.0005904,
|
| 41423 |
+
"grad_norm": 2.134547710418701,
|
| 41424 |
+
"learning_rate": 5.9029999999999996e-05,
|
| 41425 |
+
"loss": 0.9082,
|
| 41426 |
+
"step": 5904
|
| 41427 |
+
},
|
| 41428 |
+
{
|
| 41429 |
+
"epoch": 0.0005905,
|
| 41430 |
+
"grad_norm": 1.654038906097412,
|
| 41431 |
+
"learning_rate": 5.904e-05,
|
| 41432 |
+
"loss": 0.7949,
|
| 41433 |
+
"step": 5905
|
| 41434 |
+
},
|
| 41435 |
+
{
|
| 41436 |
+
"epoch": 0.0005906,
|
| 41437 |
+
"grad_norm": 1.5547631978988647,
|
| 41438 |
+
"learning_rate": 5.9050000000000006e-05,
|
| 41439 |
+
"loss": 0.752,
|
| 41440 |
+
"step": 5906
|
| 41441 |
+
},
|
| 41442 |
+
{
|
| 41443 |
+
"epoch": 0.0005907,
|
| 41444 |
+
"grad_norm": 1.5662357807159424,
|
| 41445 |
+
"learning_rate": 5.906000000000001e-05,
|
| 41446 |
+
"loss": 0.8008,
|
| 41447 |
+
"step": 5907
|
| 41448 |
+
},
|
| 41449 |
+
{
|
| 41450 |
+
"epoch": 0.0005908,
|
| 41451 |
+
"grad_norm": 1.57384192943573,
|
| 41452 |
+
"learning_rate": 5.907e-05,
|
| 41453 |
+
"loss": 0.7471,
|
| 41454 |
+
"step": 5908
|
| 41455 |
+
},
|
| 41456 |
+
{
|
| 41457 |
+
"epoch": 0.0005909,
|
| 41458 |
+
"grad_norm": 1.4176363945007324,
|
| 41459 |
+
"learning_rate": 5.9080000000000004e-05,
|
| 41460 |
+
"loss": 0.7207,
|
| 41461 |
+
"step": 5909
|
| 41462 |
+
},
|
| 41463 |
+
{
|
| 41464 |
+
"epoch": 0.000591,
|
| 41465 |
+
"grad_norm": 2.020887851715088,
|
| 41466 |
+
"learning_rate": 5.9090000000000005e-05,
|
| 41467 |
+
"loss": 1.1572,
|
| 41468 |
+
"step": 5910
|
| 41469 |
+
},
|
| 41470 |
+
{
|
| 41471 |
+
"epoch": 0.0005911,
|
| 41472 |
+
"grad_norm": 1.6313410997390747,
|
| 41473 |
+
"learning_rate": 5.91e-05,
|
| 41474 |
+
"loss": 0.7993,
|
| 41475 |
+
"step": 5911
|
| 41476 |
+
},
|
| 41477 |
+
{
|
| 41478 |
+
"epoch": 0.0005912,
|
| 41479 |
+
"grad_norm": 1.5192641019821167,
|
| 41480 |
+
"learning_rate": 5.911e-05,
|
| 41481 |
+
"loss": 0.7412,
|
| 41482 |
+
"step": 5912
|
| 41483 |
+
},
|
| 41484 |
+
{
|
| 41485 |
+
"epoch": 0.0005913,
|
| 41486 |
+
"grad_norm": 1.6517242193222046,
|
| 41487 |
+
"learning_rate": 5.912e-05,
|
| 41488 |
+
"loss": 0.793,
|
| 41489 |
+
"step": 5913
|
| 41490 |
+
},
|
| 41491 |
+
{
|
| 41492 |
+
"epoch": 0.0005914,
|
| 41493 |
+
"grad_norm": 1.642682671546936,
|
| 41494 |
+
"learning_rate": 5.913e-05,
|
| 41495 |
+
"loss": 0.7759,
|
| 41496 |
+
"step": 5914
|
| 41497 |
+
},
|
| 41498 |
+
{
|
| 41499 |
+
"epoch": 0.0005915,
|
| 41500 |
+
"grad_norm": 1.656015157699585,
|
| 41501 |
+
"learning_rate": 5.914e-05,
|
| 41502 |
+
"loss": 0.7896,
|
| 41503 |
+
"step": 5915
|
| 41504 |
+
},
|
| 41505 |
+
{
|
| 41506 |
+
"epoch": 0.0005916,
|
| 41507 |
+
"grad_norm": 1.924726963043213,
|
| 41508 |
+
"learning_rate": 5.915e-05,
|
| 41509 |
+
"loss": 0.8882,
|
| 41510 |
+
"step": 5916
|
| 41511 |
+
},
|
| 41512 |
+
{
|
| 41513 |
+
"epoch": 0.0005917,
|
| 41514 |
+
"grad_norm": 1.5693986415863037,
|
| 41515 |
+
"learning_rate": 5.916e-05,
|
| 41516 |
+
"loss": 0.7817,
|
| 41517 |
+
"step": 5917
|
| 41518 |
+
},
|
| 41519 |
+
{
|
| 41520 |
+
"epoch": 0.0005918,
|
| 41521 |
+
"grad_norm": 1.9120745658874512,
|
| 41522 |
+
"learning_rate": 5.917e-05,
|
| 41523 |
+
"loss": 0.8506,
|
| 41524 |
+
"step": 5918
|
| 41525 |
+
},
|
| 41526 |
+
{
|
| 41527 |
+
"epoch": 0.0005919,
|
| 41528 |
+
"grad_norm": 1.9217987060546875,
|
| 41529 |
+
"learning_rate": 5.918e-05,
|
| 41530 |
+
"loss": 0.8892,
|
| 41531 |
+
"step": 5919
|
| 41532 |
+
},
|
| 41533 |
+
{
|
| 41534 |
+
"epoch": 0.000592,
|
| 41535 |
+
"grad_norm": 2.201404094696045,
|
| 41536 |
+
"learning_rate": 5.919000000000001e-05,
|
| 41537 |
+
"loss": 0.9932,
|
| 41538 |
+
"step": 5920
|
| 41539 |
+
},
|
| 41540 |
+
{
|
| 41541 |
+
"epoch": 0.0005921,
|
| 41542 |
+
"grad_norm": 1.560445785522461,
|
| 41543 |
+
"learning_rate": 5.9199999999999996e-05,
|
| 41544 |
+
"loss": 0.7314,
|
| 41545 |
+
"step": 5921
|
| 41546 |
+
},
|
| 41547 |
+
{
|
| 41548 |
+
"epoch": 0.0005922,
|
| 41549 |
+
"grad_norm": 1.4320646524429321,
|
| 41550 |
+
"learning_rate": 5.921e-05,
|
| 41551 |
+
"loss": 0.7236,
|
| 41552 |
+
"step": 5922
|
| 41553 |
+
},
|
| 41554 |
+
{
|
| 41555 |
+
"epoch": 0.0005923,
|
| 41556 |
+
"grad_norm": 1.3355786800384521,
|
| 41557 |
+
"learning_rate": 5.9220000000000006e-05,
|
| 41558 |
+
"loss": 0.6875,
|
| 41559 |
+
"step": 5923
|
| 41560 |
+
},
|
| 41561 |
+
{
|
| 41562 |
+
"epoch": 0.0005924,
|
| 41563 |
+
"grad_norm": 1.5497984886169434,
|
| 41564 |
+
"learning_rate": 5.923000000000001e-05,
|
| 41565 |
+
"loss": 0.7876,
|
| 41566 |
+
"step": 5924
|
| 41567 |
+
},
|
| 41568 |
+
{
|
| 41569 |
+
"epoch": 0.0005925,
|
| 41570 |
+
"grad_norm": 1.6212843656539917,
|
| 41571 |
+
"learning_rate": 5.924e-05,
|
| 41572 |
+
"loss": 0.7842,
|
| 41573 |
+
"step": 5925
|
| 41574 |
+
},
|
| 41575 |
+
{
|
| 41576 |
+
"epoch": 0.0005926,
|
| 41577 |
+
"grad_norm": 1.6754783391952515,
|
| 41578 |
+
"learning_rate": 5.9250000000000004e-05,
|
| 41579 |
+
"loss": 0.811,
|
| 41580 |
+
"step": 5926
|
| 41581 |
+
},
|
| 41582 |
+
{
|
| 41583 |
+
"epoch": 0.0005927,
|
| 41584 |
+
"grad_norm": 1.5971804857254028,
|
| 41585 |
+
"learning_rate": 5.9260000000000005e-05,
|
| 41586 |
+
"loss": 0.7827,
|
| 41587 |
+
"step": 5927
|
| 41588 |
+
},
|
| 41589 |
+
{
|
| 41590 |
+
"epoch": 0.0005928,
|
| 41591 |
+
"grad_norm": 1.6103317737579346,
|
| 41592 |
+
"learning_rate": 5.927e-05,
|
| 41593 |
+
"loss": 0.8335,
|
| 41594 |
+
"step": 5928
|
| 41595 |
+
},
|
| 41596 |
+
{
|
| 41597 |
+
"epoch": 0.0005929,
|
| 41598 |
+
"grad_norm": 1.4968230724334717,
|
| 41599 |
+
"learning_rate": 5.928e-05,
|
| 41600 |
+
"loss": 0.7837,
|
| 41601 |
+
"step": 5929
|
| 41602 |
+
},
|
| 41603 |
+
{
|
| 41604 |
+
"epoch": 0.000593,
|
| 41605 |
+
"grad_norm": 2.0906319618225098,
|
| 41606 |
+
"learning_rate": 5.929e-05,
|
| 41607 |
+
"loss": 0.9165,
|
| 41608 |
+
"step": 5930
|
| 41609 |
+
},
|
| 41610 |
+
{
|
| 41611 |
+
"epoch": 0.0005931,
|
| 41612 |
+
"grad_norm": 1.530055046081543,
|
| 41613 |
+
"learning_rate": 5.93e-05,
|
| 41614 |
+
"loss": 0.7358,
|
| 41615 |
+
"step": 5931
|
| 41616 |
+
},
|
| 41617 |
+
{
|
| 41618 |
+
"epoch": 0.0005932,
|
| 41619 |
+
"grad_norm": 1.9216514825820923,
|
| 41620 |
+
"learning_rate": 5.931e-05,
|
| 41621 |
+
"loss": 0.9087,
|
| 41622 |
+
"step": 5932
|
| 41623 |
+
},
|
| 41624 |
+
{
|
| 41625 |
+
"epoch": 0.0005933,
|
| 41626 |
+
"grad_norm": 1.5280019044876099,
|
| 41627 |
+
"learning_rate": 5.932e-05,
|
| 41628 |
+
"loss": 0.7646,
|
| 41629 |
+
"step": 5933
|
| 41630 |
+
},
|
| 41631 |
+
{
|
| 41632 |
+
"epoch": 0.0005934,
|
| 41633 |
+
"grad_norm": 1.504082202911377,
|
| 41634 |
+
"learning_rate": 5.933e-05,
|
| 41635 |
+
"loss": 0.7935,
|
| 41636 |
+
"step": 5934
|
| 41637 |
+
},
|
| 41638 |
+
{
|
| 41639 |
+
"epoch": 0.0005935,
|
| 41640 |
+
"grad_norm": 1.5964938402175903,
|
| 41641 |
+
"learning_rate": 5.934e-05,
|
| 41642 |
+
"loss": 0.8315,
|
| 41643 |
+
"step": 5935
|
| 41644 |
+
},
|
| 41645 |
+
{
|
| 41646 |
+
"epoch": 0.0005936,
|
| 41647 |
+
"grad_norm": 1.5614784955978394,
|
| 41648 |
+
"learning_rate": 5.935e-05,
|
| 41649 |
+
"loss": 0.8149,
|
| 41650 |
+
"step": 5936
|
| 41651 |
+
},
|
| 41652 |
+
{
|
| 41653 |
+
"epoch": 0.0005937,
|
| 41654 |
+
"grad_norm": 1.6300089359283447,
|
| 41655 |
+
"learning_rate": 5.936000000000001e-05,
|
| 41656 |
+
"loss": 0.7729,
|
| 41657 |
+
"step": 5937
|
| 41658 |
+
},
|
| 41659 |
+
{
|
| 41660 |
+
"epoch": 0.0005938,
|
| 41661 |
+
"grad_norm": 2.1969408988952637,
|
| 41662 |
+
"learning_rate": 5.9369999999999996e-05,
|
| 41663 |
+
"loss": 0.9463,
|
| 41664 |
+
"step": 5938
|
| 41665 |
+
},
|
| 41666 |
+
{
|
| 41667 |
+
"epoch": 0.0005939,
|
| 41668 |
+
"grad_norm": 1.637465476989746,
|
| 41669 |
+
"learning_rate": 5.938e-05,
|
| 41670 |
+
"loss": 0.7358,
|
| 41671 |
+
"step": 5939
|
| 41672 |
+
},
|
| 41673 |
+
{
|
| 41674 |
+
"epoch": 0.000594,
|
| 41675 |
+
"grad_norm": 2.084805488586426,
|
| 41676 |
+
"learning_rate": 5.9390000000000006e-05,
|
| 41677 |
+
"loss": 0.8447,
|
| 41678 |
+
"step": 5940
|
| 41679 |
+
},
|
| 41680 |
+
{
|
| 41681 |
+
"epoch": 0.0005941,
|
| 41682 |
+
"grad_norm": 1.8123067617416382,
|
| 41683 |
+
"learning_rate": 5.940000000000001e-05,
|
| 41684 |
+
"loss": 0.7827,
|
| 41685 |
+
"step": 5941
|
| 41686 |
+
},
|
| 41687 |
+
{
|
| 41688 |
+
"epoch": 0.0005942,
|
| 41689 |
+
"grad_norm": 2.333995819091797,
|
| 41690 |
+
"learning_rate": 5.941e-05,
|
| 41691 |
+
"loss": 0.8882,
|
| 41692 |
+
"step": 5942
|
| 41693 |
+
},
|
| 41694 |
+
{
|
| 41695 |
+
"epoch": 0.0005943,
|
| 41696 |
+
"grad_norm": 1.5387401580810547,
|
| 41697 |
+
"learning_rate": 5.9420000000000004e-05,
|
| 41698 |
+
"loss": 0.7437,
|
| 41699 |
+
"step": 5943
|
| 41700 |
+
},
|
| 41701 |
+
{
|
| 41702 |
+
"epoch": 0.0005944,
|
| 41703 |
+
"grad_norm": 1.5562912225723267,
|
| 41704 |
+
"learning_rate": 5.9430000000000005e-05,
|
| 41705 |
+
"loss": 0.7188,
|
| 41706 |
+
"step": 5944
|
| 41707 |
+
},
|
| 41708 |
+
{
|
| 41709 |
+
"epoch": 0.0005945,
|
| 41710 |
+
"grad_norm": 1.7533470392227173,
|
| 41711 |
+
"learning_rate": 5.944e-05,
|
| 41712 |
+
"loss": 0.8643,
|
| 41713 |
+
"step": 5945
|
| 41714 |
+
},
|
| 41715 |
+
{
|
| 41716 |
+
"epoch": 0.0005946,
|
| 41717 |
+
"grad_norm": 1.5385069847106934,
|
| 41718 |
+
"learning_rate": 5.945e-05,
|
| 41719 |
+
"loss": 0.7598,
|
| 41720 |
+
"step": 5946
|
| 41721 |
+
},
|
| 41722 |
+
{
|
| 41723 |
+
"epoch": 0.0005947,
|
| 41724 |
+
"grad_norm": 1.808990716934204,
|
| 41725 |
+
"learning_rate": 5.946e-05,
|
| 41726 |
+
"loss": 0.8101,
|
| 41727 |
+
"step": 5947
|
| 41728 |
+
},
|
| 41729 |
+
{
|
| 41730 |
+
"epoch": 0.0005948,
|
| 41731 |
+
"grad_norm": 1.5682218074798584,
|
| 41732 |
+
"learning_rate": 5.947e-05,
|
| 41733 |
+
"loss": 0.8096,
|
| 41734 |
+
"step": 5948
|
| 41735 |
+
},
|
| 41736 |
+
{
|
| 41737 |
+
"epoch": 0.0005949,
|
| 41738 |
+
"grad_norm": 1.9593143463134766,
|
| 41739 |
+
"learning_rate": 5.948e-05,
|
| 41740 |
+
"loss": 0.873,
|
| 41741 |
+
"step": 5949
|
| 41742 |
+
},
|
| 41743 |
+
{
|
| 41744 |
+
"epoch": 0.000595,
|
| 41745 |
+
"grad_norm": 2.1629326343536377,
|
| 41746 |
+
"learning_rate": 5.949e-05,
|
| 41747 |
+
"loss": 0.8833,
|
| 41748 |
+
"step": 5950
|
| 41749 |
+
},
|
| 41750 |
+
{
|
| 41751 |
+
"epoch": 0.0005951,
|
| 41752 |
+
"grad_norm": 2.159661293029785,
|
| 41753 |
+
"learning_rate": 5.95e-05,
|
| 41754 |
+
"loss": 0.8745,
|
| 41755 |
+
"step": 5951
|
| 41756 |
+
},
|
| 41757 |
+
{
|
| 41758 |
+
"epoch": 0.0005952,
|
| 41759 |
+
"grad_norm": 1.592955231666565,
|
| 41760 |
+
"learning_rate": 5.951e-05,
|
| 41761 |
+
"loss": 0.7007,
|
| 41762 |
+
"step": 5952
|
| 41763 |
+
},
|
| 41764 |
+
{
|
| 41765 |
+
"epoch": 0.0005953,
|
| 41766 |
+
"grad_norm": 1.721563696861267,
|
| 41767 |
+
"learning_rate": 5.952e-05,
|
| 41768 |
+
"loss": 0.793,
|
| 41769 |
+
"step": 5953
|
| 41770 |
+
},
|
| 41771 |
+
{
|
| 41772 |
+
"epoch": 0.0005954,
|
| 41773 |
+
"grad_norm": 1.742895245552063,
|
| 41774 |
+
"learning_rate": 5.953e-05,
|
| 41775 |
+
"loss": 0.7817,
|
| 41776 |
+
"step": 5954
|
| 41777 |
+
},
|
| 41778 |
+
{
|
| 41779 |
+
"epoch": 0.0005955,
|
| 41780 |
+
"grad_norm": 1.558279037475586,
|
| 41781 |
+
"learning_rate": 5.9539999999999996e-05,
|
| 41782 |
+
"loss": 0.8027,
|
| 41783 |
+
"step": 5955
|
| 41784 |
+
},
|
| 41785 |
+
{
|
| 41786 |
+
"epoch": 0.0005956,
|
| 41787 |
+
"grad_norm": 1.4903243780136108,
|
| 41788 |
+
"learning_rate": 5.955e-05,
|
| 41789 |
+
"loss": 0.7188,
|
| 41790 |
+
"step": 5956
|
| 41791 |
+
},
|
| 41792 |
+
{
|
| 41793 |
+
"epoch": 0.0005957,
|
| 41794 |
+
"grad_norm": 1.782842755317688,
|
| 41795 |
+
"learning_rate": 5.9560000000000006e-05,
|
| 41796 |
+
"loss": 0.7915,
|
| 41797 |
+
"step": 5957
|
| 41798 |
+
},
|
| 41799 |
+
{
|
| 41800 |
+
"epoch": 0.0005958,
|
| 41801 |
+
"grad_norm": 1.6400823593139648,
|
| 41802 |
+
"learning_rate": 5.957000000000001e-05,
|
| 41803 |
+
"loss": 0.7417,
|
| 41804 |
+
"step": 5958
|
| 41805 |
+
},
|
| 41806 |
+
{
|
| 41807 |
+
"epoch": 0.0005959,
|
| 41808 |
+
"grad_norm": 1.4021106958389282,
|
| 41809 |
+
"learning_rate": 5.958e-05,
|
| 41810 |
+
"loss": 0.6851,
|
| 41811 |
+
"step": 5959
|
| 41812 |
+
},
|
| 41813 |
+
{
|
| 41814 |
+
"epoch": 0.000596,
|
| 41815 |
+
"grad_norm": 1.7664570808410645,
|
| 41816 |
+
"learning_rate": 5.9590000000000004e-05,
|
| 41817 |
+
"loss": 0.8364,
|
| 41818 |
+
"step": 5960
|
| 41819 |
+
},
|
| 41820 |
+
{
|
| 41821 |
+
"epoch": 0.0005961,
|
| 41822 |
+
"grad_norm": 1.5353564023971558,
|
| 41823 |
+
"learning_rate": 5.9600000000000005e-05,
|
| 41824 |
+
"loss": 0.7529,
|
| 41825 |
+
"step": 5961
|
| 41826 |
+
},
|
| 41827 |
+
{
|
| 41828 |
+
"epoch": 0.0005962,
|
| 41829 |
+
"grad_norm": 1.3988913297653198,
|
| 41830 |
+
"learning_rate": 5.961e-05,
|
| 41831 |
+
"loss": 0.6948,
|
| 41832 |
+
"step": 5962
|
| 41833 |
+
},
|
| 41834 |
+
{
|
| 41835 |
+
"epoch": 0.0005963,
|
| 41836 |
+
"grad_norm": 3.4333651065826416,
|
| 41837 |
+
"learning_rate": 5.962e-05,
|
| 41838 |
+
"loss": 0.8628,
|
| 41839 |
+
"step": 5963
|
| 41840 |
+
},
|
| 41841 |
+
{
|
| 41842 |
+
"epoch": 0.0005964,
|
| 41843 |
+
"grad_norm": 1.5233893394470215,
|
| 41844 |
+
"learning_rate": 5.963e-05,
|
| 41845 |
+
"loss": 0.7407,
|
| 41846 |
+
"step": 5964
|
| 41847 |
+
},
|
| 41848 |
+
{
|
| 41849 |
+
"epoch": 0.0005965,
|
| 41850 |
+
"grad_norm": 1.7804893255233765,
|
| 41851 |
+
"learning_rate": 5.964e-05,
|
| 41852 |
+
"loss": 0.771,
|
| 41853 |
+
"step": 5965
|
| 41854 |
+
},
|
| 41855 |
+
{
|
| 41856 |
+
"epoch": 0.0005966,
|
| 41857 |
+
"grad_norm": 1.4234447479248047,
|
| 41858 |
+
"learning_rate": 5.965e-05,
|
| 41859 |
+
"loss": 0.6919,
|
| 41860 |
+
"step": 5966
|
| 41861 |
+
},
|
| 41862 |
+
{
|
| 41863 |
+
"epoch": 0.0005967,
|
| 41864 |
+
"grad_norm": 1.5506657361984253,
|
| 41865 |
+
"learning_rate": 5.966e-05,
|
| 41866 |
+
"loss": 0.7441,
|
| 41867 |
+
"step": 5967
|
| 41868 |
+
},
|
| 41869 |
+
{
|
| 41870 |
+
"epoch": 0.0005968,
|
| 41871 |
+
"grad_norm": 1.599525809288025,
|
| 41872 |
+
"learning_rate": 5.967e-05,
|
| 41873 |
+
"loss": 0.7598,
|
| 41874 |
+
"step": 5968
|
| 41875 |
+
},
|
| 41876 |
+
{
|
| 41877 |
+
"epoch": 0.0005969,
|
| 41878 |
+
"grad_norm": 2.79545259475708,
|
| 41879 |
+
"learning_rate": 5.968e-05,
|
| 41880 |
+
"loss": 0.8911,
|
| 41881 |
+
"step": 5969
|
| 41882 |
+
},
|
| 41883 |
+
{
|
| 41884 |
+
"epoch": 0.000597,
|
| 41885 |
+
"grad_norm": 1.5482027530670166,
|
| 41886 |
+
"learning_rate": 5.969e-05,
|
| 41887 |
+
"loss": 0.7261,
|
| 41888 |
+
"step": 5970
|
| 41889 |
+
},
|
| 41890 |
+
{
|
| 41891 |
+
"epoch": 0.0005971,
|
| 41892 |
+
"grad_norm": 1.4654983282089233,
|
| 41893 |
+
"learning_rate": 5.97e-05,
|
| 41894 |
+
"loss": 0.6968,
|
| 41895 |
+
"step": 5971
|
| 41896 |
+
},
|
| 41897 |
+
{
|
| 41898 |
+
"epoch": 0.0005972,
|
| 41899 |
+
"grad_norm": 1.8669058084487915,
|
| 41900 |
+
"learning_rate": 5.9709999999999996e-05,
|
| 41901 |
+
"loss": 0.7749,
|
| 41902 |
+
"step": 5972
|
| 41903 |
+
},
|
| 41904 |
+
{
|
| 41905 |
+
"epoch": 0.0005973,
|
| 41906 |
+
"grad_norm": 1.5295121669769287,
|
| 41907 |
+
"learning_rate": 5.972e-05,
|
| 41908 |
+
"loss": 0.7104,
|
| 41909 |
+
"step": 5973
|
| 41910 |
+
},
|
| 41911 |
+
{
|
| 41912 |
+
"epoch": 0.0005974,
|
| 41913 |
+
"grad_norm": 1.6404061317443848,
|
| 41914 |
+
"learning_rate": 5.9730000000000006e-05,
|
| 41915 |
+
"loss": 0.7319,
|
| 41916 |
+
"step": 5974
|
| 41917 |
+
},
|
| 41918 |
+
{
|
| 41919 |
+
"epoch": 0.0005975,
|
| 41920 |
+
"grad_norm": 1.3939292430877686,
|
| 41921 |
+
"learning_rate": 5.974000000000001e-05,
|
| 41922 |
+
"loss": 0.6626,
|
| 41923 |
+
"step": 5975
|
| 41924 |
+
},
|
| 41925 |
+
{
|
| 41926 |
+
"epoch": 0.0005976,
|
| 41927 |
+
"grad_norm": 1.4034295082092285,
|
| 41928 |
+
"learning_rate": 5.9749999999999995e-05,
|
| 41929 |
+
"loss": 0.6572,
|
| 41930 |
+
"step": 5976
|
| 41931 |
+
},
|
| 41932 |
+
{
|
| 41933 |
+
"epoch": 0.0005977,
|
| 41934 |
+
"grad_norm": 1.4362789392471313,
|
| 41935 |
+
"learning_rate": 5.9760000000000004e-05,
|
| 41936 |
+
"loss": 0.6987,
|
| 41937 |
+
"step": 5977
|
| 41938 |
+
},
|
| 41939 |
+
{
|
| 41940 |
+
"epoch": 0.0005978,
|
| 41941 |
+
"grad_norm": 1.4524978399276733,
|
| 41942 |
+
"learning_rate": 5.9770000000000005e-05,
|
| 41943 |
+
"loss": 0.71,
|
| 41944 |
+
"step": 5978
|
| 41945 |
+
},
|
| 41946 |
+
{
|
| 41947 |
+
"epoch": 0.0005979,
|
| 41948 |
+
"grad_norm": 2.0200347900390625,
|
| 41949 |
+
"learning_rate": 5.978e-05,
|
| 41950 |
+
"loss": 0.855,
|
| 41951 |
+
"step": 5979
|
| 41952 |
+
},
|
| 41953 |
+
{
|
| 41954 |
+
"epoch": 0.000598,
|
| 41955 |
+
"grad_norm": 1.6224732398986816,
|
| 41956 |
+
"learning_rate": 5.979e-05,
|
| 41957 |
+
"loss": 0.7871,
|
| 41958 |
+
"step": 5980
|
| 41959 |
+
},
|
| 41960 |
+
{
|
| 41961 |
+
"epoch": 0.0005981,
|
| 41962 |
+
"grad_norm": 1.3911391496658325,
|
| 41963 |
+
"learning_rate": 5.9800000000000003e-05,
|
| 41964 |
+
"loss": 0.6836,
|
| 41965 |
+
"step": 5981
|
| 41966 |
+
},
|
| 41967 |
+
{
|
| 41968 |
+
"epoch": 0.0005982,
|
| 41969 |
+
"grad_norm": 1.5443239212036133,
|
| 41970 |
+
"learning_rate": 5.981e-05,
|
| 41971 |
+
"loss": 0.7676,
|
| 41972 |
+
"step": 5982
|
| 41973 |
+
},
|
| 41974 |
+
{
|
| 41975 |
+
"epoch": 0.0005983,
|
| 41976 |
+
"grad_norm": 1.5114848613739014,
|
| 41977 |
+
"learning_rate": 5.982e-05,
|
| 41978 |
+
"loss": 0.73,
|
| 41979 |
+
"step": 5983
|
| 41980 |
+
},
|
| 41981 |
+
{
|
| 41982 |
+
"epoch": 0.0005984,
|
| 41983 |
+
"grad_norm": 1.7796193361282349,
|
| 41984 |
+
"learning_rate": 5.983e-05,
|
| 41985 |
+
"loss": 0.9609,
|
| 41986 |
+
"step": 5984
|
| 41987 |
+
},
|
| 41988 |
+
{
|
| 41989 |
+
"epoch": 0.0005985,
|
| 41990 |
+
"grad_norm": 1.8018146753311157,
|
| 41991 |
+
"learning_rate": 5.984e-05,
|
| 41992 |
+
"loss": 0.8877,
|
| 41993 |
+
"step": 5985
|
| 41994 |
+
},
|
| 41995 |
+
{
|
| 41996 |
+
"epoch": 0.0005986,
|
| 41997 |
+
"grad_norm": 1.4452275037765503,
|
| 41998 |
+
"learning_rate": 5.985e-05,
|
| 41999 |
+
"loss": 0.6768,
|
| 42000 |
+
"step": 5986
|
| 42001 |
+
},
|
| 42002 |
+
{
|
| 42003 |
+
"epoch": 0.0005987,
|
| 42004 |
+
"grad_norm": 1.846722960472107,
|
| 42005 |
+
"learning_rate": 5.986e-05,
|
| 42006 |
+
"loss": 0.8223,
|
| 42007 |
+
"step": 5987
|
| 42008 |
+
},
|
| 42009 |
+
{
|
| 42010 |
+
"epoch": 0.0005988,
|
| 42011 |
+
"grad_norm": 2.8641154766082764,
|
| 42012 |
+
"learning_rate": 5.987e-05,
|
| 42013 |
+
"loss": 0.9253,
|
| 42014 |
+
"step": 5988
|
| 42015 |
+
},
|
| 42016 |
+
{
|
| 42017 |
+
"epoch": 0.0005989,
|
| 42018 |
+
"grad_norm": 1.8777289390563965,
|
| 42019 |
+
"learning_rate": 5.9879999999999996e-05,
|
| 42020 |
+
"loss": 0.7803,
|
| 42021 |
+
"step": 5989
|
| 42022 |
+
},
|
| 42023 |
+
{
|
| 42024 |
+
"epoch": 0.000599,
|
| 42025 |
+
"grad_norm": 1.9890245199203491,
|
| 42026 |
+
"learning_rate": 5.989e-05,
|
| 42027 |
+
"loss": 0.8408,
|
| 42028 |
+
"step": 5990
|
| 42029 |
+
},
|
| 42030 |
+
{
|
| 42031 |
+
"epoch": 0.0005991,
|
| 42032 |
+
"grad_norm": 1.6677210330963135,
|
| 42033 |
+
"learning_rate": 5.9900000000000006e-05,
|
| 42034 |
+
"loss": 0.7793,
|
| 42035 |
+
"step": 5991
|
| 42036 |
+
},
|
| 42037 |
+
{
|
| 42038 |
+
"epoch": 0.0005992,
|
| 42039 |
+
"grad_norm": 1.460680365562439,
|
| 42040 |
+
"learning_rate": 5.991000000000001e-05,
|
| 42041 |
+
"loss": 0.7139,
|
| 42042 |
+
"step": 5992
|
| 42043 |
+
},
|
| 42044 |
+
{
|
| 42045 |
+
"epoch": 0.0005993,
|
| 42046 |
+
"grad_norm": 1.5622482299804688,
|
| 42047 |
+
"learning_rate": 5.9919999999999996e-05,
|
| 42048 |
+
"loss": 0.73,
|
| 42049 |
+
"step": 5993
|
| 42050 |
+
},
|
| 42051 |
+
{
|
| 42052 |
+
"epoch": 0.0005994,
|
| 42053 |
+
"grad_norm": 1.4825682640075684,
|
| 42054 |
+
"learning_rate": 5.9930000000000004e-05,
|
| 42055 |
+
"loss": 0.6875,
|
| 42056 |
+
"step": 5994
|
| 42057 |
+
},
|
| 42058 |
+
{
|
| 42059 |
+
"epoch": 0.0005995,
|
| 42060 |
+
"grad_norm": 1.6099538803100586,
|
| 42061 |
+
"learning_rate": 5.9940000000000005e-05,
|
| 42062 |
+
"loss": 0.792,
|
| 42063 |
+
"step": 5995
|
| 42064 |
+
},
|
| 42065 |
+
{
|
| 42066 |
+
"epoch": 0.0005996,
|
| 42067 |
+
"grad_norm": 1.5532127618789673,
|
| 42068 |
+
"learning_rate": 5.995e-05,
|
| 42069 |
+
"loss": 0.731,
|
| 42070 |
+
"step": 5996
|
| 42071 |
+
},
|
| 42072 |
+
{
|
| 42073 |
+
"epoch": 0.0005997,
|
| 42074 |
+
"grad_norm": 1.4975957870483398,
|
| 42075 |
+
"learning_rate": 5.996e-05,
|
| 42076 |
+
"loss": 0.7085,
|
| 42077 |
+
"step": 5997
|
| 42078 |
+
},
|
| 42079 |
+
{
|
| 42080 |
+
"epoch": 0.0005998,
|
| 42081 |
+
"grad_norm": 1.4132734537124634,
|
| 42082 |
+
"learning_rate": 5.9970000000000004e-05,
|
| 42083 |
+
"loss": 0.6748,
|
| 42084 |
+
"step": 5998
|
| 42085 |
+
},
|
| 42086 |
+
{
|
| 42087 |
+
"epoch": 0.0005999,
|
| 42088 |
+
"grad_norm": 1.314280390739441,
|
| 42089 |
+
"learning_rate": 5.998e-05,
|
| 42090 |
+
"loss": 0.6504,
|
| 42091 |
+
"step": 5999
|
| 42092 |
+
},
|
| 42093 |
+
{
|
| 42094 |
+
"epoch": 0.0006,
|
| 42095 |
+
"grad_norm": 1.5365554094314575,
|
| 42096 |
+
"learning_rate": 5.999e-05,
|
| 42097 |
+
"loss": 0.6851,
|
| 42098 |
+
"step": 6000
|
| 42099 |
+
},
|
| 42100 |
+
{
|
| 42101 |
+
"epoch": 0.0006,
|
| 42102 |
+
"eval_loss": 0.08720719069242477,
|
| 42103 |
+
"eval_runtime": 362.1442,
|
| 42104 |
+
"eval_samples_per_second": 27.613,
|
| 42105 |
+
"eval_steps_per_second": 1.726,
|
| 42106 |
+
"step": 6000
|
| 42107 |
}
|
| 42108 |
],
|
| 42109 |
"logging_steps": 1,
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae380d9a3464bac94b12b7910f8c9fe7f8da9b6797d02eff5df63fffc97e0f19
|
| 3 |
size 5841
|