Upload 10 files
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3358 -4
- training_args.bin +1 -1
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598635032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20566ce2cded3cc02fb93eee498468296e195bde2f327717d82d2153bf039a5c
|
| 3 |
size 598635032
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1197359627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4bb4ca88b8d2d3ea2dfd7303f13e3a8cd59c49a916572576e2dc64da5c07512
|
| 3 |
size 1197359627
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50c7b18601d8312eab9dd312837f003a894f9f32c0a047b958e34fe83b5149bb
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f734a52ddcea7feef7729d4ad9d1d723abcc8fb15cbcedadde156471860e8d2
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -21075,10 +21075,3364 @@
|
|
| 21075 |
"eval_samples_per_second": 195.474,
|
| 21076 |
"eval_steps_per_second": 1.534,
|
| 21077 |
"step": 270000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21078 |
}
|
| 21079 |
],
|
| 21080 |
"logging_steps": 100,
|
| 21081 |
-
"max_steps":
|
| 21082 |
"num_input_tokens_seen": 0,
|
| 21083 |
"num_train_epochs": 9223372036854775807,
|
| 21084 |
"save_steps": 1000,
|
|
@@ -21094,7 +24448,7 @@
|
|
| 21094 |
"attributes": {}
|
| 21095 |
}
|
| 21096 |
},
|
| 21097 |
-
"total_flos": 2.
|
| 21098 |
"train_batch_size": 128,
|
| 21099 |
"trial_name": null,
|
| 21100 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.02,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 313000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 21075 |
"eval_samples_per_second": 195.474,
|
| 21076 |
"eval_steps_per_second": 1.534,
|
| 21077 |
"step": 270000
|
| 21078 |
+
},
|
| 21079 |
+
{
|
| 21080 |
+
"epoch": 0.00027908170954291995,
|
| 21081 |
+
"grad_norm": 0.9838644862174988,
|
| 21082 |
+
"learning_rate": 7.406365143716071e-06,
|
| 21083 |
+
"loss": 1.9287,
|
| 21084 |
+
"step": 270100
|
| 21085 |
+
},
|
| 21086 |
+
{
|
| 21087 |
+
"epoch": 0.0005581634190858399,
|
| 21088 |
+
"grad_norm": 1.0142942667007446,
|
| 21089 |
+
"learning_rate": 7.390642289947644e-06,
|
| 21090 |
+
"loss": 1.9146,
|
| 21091 |
+
"step": 270200
|
| 21092 |
+
},
|
| 21093 |
+
{
|
| 21094 |
+
"epoch": 0.0008372451286287599,
|
| 21095 |
+
"grad_norm": 1.0048279762268066,
|
| 21096 |
+
"learning_rate": 7.374933247575938e-06,
|
| 21097 |
+
"loss": 1.921,
|
| 21098 |
+
"step": 270300
|
| 21099 |
+
},
|
| 21100 |
+
{
|
| 21101 |
+
"epoch": 0.0011163268381716798,
|
| 21102 |
+
"grad_norm": 1.061614751815796,
|
| 21103 |
+
"learning_rate": 7.359238028921914e-06,
|
| 21104 |
+
"loss": 1.9098,
|
| 21105 |
+
"step": 270400
|
| 21106 |
+
},
|
| 21107 |
+
{
|
| 21108 |
+
"epoch": 0.0013954085477146,
|
| 21109 |
+
"grad_norm": 1.0184416770935059,
|
| 21110 |
+
"learning_rate": 7.343556646295647e-06,
|
| 21111 |
+
"loss": 1.9307,
|
| 21112 |
+
"step": 270500
|
| 21113 |
+
},
|
| 21114 |
+
{
|
| 21115 |
+
"epoch": 0.0016744902572575198,
|
| 21116 |
+
"grad_norm": 1.0567371845245361,
|
| 21117 |
+
"learning_rate": 7.327889111996397e-06,
|
| 21118 |
+
"loss": 1.9093,
|
| 21119 |
+
"step": 270600
|
| 21120 |
+
},
|
| 21121 |
+
{
|
| 21122 |
+
"epoch": 0.00195357196680044,
|
| 21123 |
+
"grad_norm": 1.0020757913589478,
|
| 21124 |
+
"learning_rate": 7.312235438312537e-06,
|
| 21125 |
+
"loss": 1.9089,
|
| 21126 |
+
"step": 270700
|
| 21127 |
+
},
|
| 21128 |
+
{
|
| 21129 |
+
"epoch": 0.0022326536763433596,
|
| 21130 |
+
"grad_norm": 0.9947327375411987,
|
| 21131 |
+
"learning_rate": 7.296595637521581e-06,
|
| 21132 |
+
"loss": 1.9175,
|
| 21133 |
+
"step": 270800
|
| 21134 |
+
},
|
| 21135 |
+
{
|
| 21136 |
+
"epoch": 0.0025117353858862797,
|
| 21137 |
+
"grad_norm": 0.9927939176559448,
|
| 21138 |
+
"learning_rate": 7.280969721890163e-06,
|
| 21139 |
+
"loss": 1.9116,
|
| 21140 |
+
"step": 270900
|
| 21141 |
+
},
|
| 21142 |
+
{
|
| 21143 |
+
"epoch": 0.0027908170954292,
|
| 21144 |
+
"grad_norm": 0.994209885597229,
|
| 21145 |
+
"learning_rate": 7.26535770367403e-06,
|
| 21146 |
+
"loss": 1.9031,
|
| 21147 |
+
"step": 271000
|
| 21148 |
+
},
|
| 21149 |
+
{
|
| 21150 |
+
"epoch": 0.0027908170954292,
|
| 21151 |
+
"eval_loss": 2.1892189979553223,
|
| 21152 |
+
"eval_runtime": 53.5994,
|
| 21153 |
+
"eval_samples_per_second": 190.189,
|
| 21154 |
+
"eval_steps_per_second": 1.493,
|
| 21155 |
+
"step": 271000
|
| 21156 |
+
},
|
| 21157 |
+
{
|
| 21158 |
+
"epoch": 0.00306989880497212,
|
| 21159 |
+
"grad_norm": 0.9945200085639954,
|
| 21160 |
+
"learning_rate": 7.249759595118011e-06,
|
| 21161 |
+
"loss": 1.9045,
|
| 21162 |
+
"step": 271100
|
| 21163 |
+
},
|
| 21164 |
+
{
|
| 21165 |
+
"epoch": 0.0033489805145150396,
|
| 21166 |
+
"grad_norm": 0.9387146234512329,
|
| 21167 |
+
"learning_rate": 7.234175408456037e-06,
|
| 21168 |
+
"loss": 1.9048,
|
| 21169 |
+
"step": 271200
|
| 21170 |
+
},
|
| 21171 |
+
{
|
| 21172 |
+
"epoch": 0.0036280622240579597,
|
| 21173 |
+
"grad_norm": 0.9996144771575928,
|
| 21174 |
+
"learning_rate": 7.218605155911126e-06,
|
| 21175 |
+
"loss": 1.9089,
|
| 21176 |
+
"step": 271300
|
| 21177 |
+
},
|
| 21178 |
+
{
|
| 21179 |
+
"epoch": 0.00390714393360088,
|
| 21180 |
+
"grad_norm": 0.9891520142555237,
|
| 21181 |
+
"learning_rate": 7.203048849695357e-06,
|
| 21182 |
+
"loss": 1.9093,
|
| 21183 |
+
"step": 271400
|
| 21184 |
+
},
|
| 21185 |
+
{
|
| 21186 |
+
"epoch": 0.0041862256431437995,
|
| 21187 |
+
"grad_norm": 1.0603066682815552,
|
| 21188 |
+
"learning_rate": 7.187506502009886e-06,
|
| 21189 |
+
"loss": 1.8988,
|
| 21190 |
+
"step": 271500
|
| 21191 |
+
},
|
| 21192 |
+
{
|
| 21193 |
+
"epoch": 0.004465307352686719,
|
| 21194 |
+
"grad_norm": 1.0593341588974,
|
| 21195 |
+
"learning_rate": 7.17197812504489e-06,
|
| 21196 |
+
"loss": 1.9138,
|
| 21197 |
+
"step": 271600
|
| 21198 |
+
},
|
| 21199 |
+
{
|
| 21200 |
+
"epoch": 0.00474438906222964,
|
| 21201 |
+
"grad_norm": 1.0183734893798828,
|
| 21202 |
+
"learning_rate": 7.156463730979626e-06,
|
| 21203 |
+
"loss": 1.9011,
|
| 21204 |
+
"step": 271700
|
| 21205 |
+
},
|
| 21206 |
+
{
|
| 21207 |
+
"epoch": 0.005023470771772559,
|
| 21208 |
+
"grad_norm": 0.9992024302482605,
|
| 21209 |
+
"learning_rate": 7.140963331982351e-06,
|
| 21210 |
+
"loss": 1.9059,
|
| 21211 |
+
"step": 271800
|
| 21212 |
+
},
|
| 21213 |
+
{
|
| 21214 |
+
"epoch": 0.00530255248131548,
|
| 21215 |
+
"grad_norm": 0.9801898002624512,
|
| 21216 |
+
"learning_rate": 7.125476940210371e-06,
|
| 21217 |
+
"loss": 1.905,
|
| 21218 |
+
"step": 271900
|
| 21219 |
+
},
|
| 21220 |
+
{
|
| 21221 |
+
"epoch": 0.0055816341908584,
|
| 21222 |
+
"grad_norm": 0.965479850769043,
|
| 21223 |
+
"learning_rate": 7.110004567809986e-06,
|
| 21224 |
+
"loss": 1.9043,
|
| 21225 |
+
"step": 272000
|
| 21226 |
+
},
|
| 21227 |
+
{
|
| 21228 |
+
"epoch": 0.0055816341908584,
|
| 21229 |
+
"eval_loss": 2.1842684745788574,
|
| 21230 |
+
"eval_runtime": 51.7157,
|
| 21231 |
+
"eval_samples_per_second": 197.116,
|
| 21232 |
+
"eval_steps_per_second": 1.547,
|
| 21233 |
+
"step": 272000
|
| 21234 |
+
},
|
| 21235 |
+
{
|
| 21236 |
+
"epoch": 0.005860715900401319,
|
| 21237 |
+
"grad_norm": 1.0330137014389038,
|
| 21238 |
+
"learning_rate": 7.094546226916513e-06,
|
| 21239 |
+
"loss": 1.9144,
|
| 21240 |
+
"step": 272100
|
| 21241 |
+
},
|
| 21242 |
+
{
|
| 21243 |
+
"epoch": 0.00613979760994424,
|
| 21244 |
+
"grad_norm": 0.9688111543655396,
|
| 21245 |
+
"learning_rate": 7.079101929654261e-06,
|
| 21246 |
+
"loss": 1.9102,
|
| 21247 |
+
"step": 272200
|
| 21248 |
+
},
|
| 21249 |
+
{
|
| 21250 |
+
"epoch": 0.0064188793194871595,
|
| 21251 |
+
"grad_norm": 0.9989941120147705,
|
| 21252 |
+
"learning_rate": 7.06367168813653e-06,
|
| 21253 |
+
"loss": 1.9074,
|
| 21254 |
+
"step": 272300
|
| 21255 |
+
},
|
| 21256 |
+
{
|
| 21257 |
+
"epoch": 0.006697961029030079,
|
| 21258 |
+
"grad_norm": 1.0278581380844116,
|
| 21259 |
+
"learning_rate": 7.048255514465577e-06,
|
| 21260 |
+
"loss": 1.8924,
|
| 21261 |
+
"step": 272400
|
| 21262 |
+
},
|
| 21263 |
+
{
|
| 21264 |
+
"epoch": 0.006977042738573,
|
| 21265 |
+
"grad_norm": 0.9955400228500366,
|
| 21266 |
+
"learning_rate": 7.032853420732644e-06,
|
| 21267 |
+
"loss": 1.8814,
|
| 21268 |
+
"step": 272500
|
| 21269 |
+
},
|
| 21270 |
+
{
|
| 21271 |
+
"epoch": 0.0072561244481159195,
|
| 21272 |
+
"grad_norm": 0.9963505864143372,
|
| 21273 |
+
"learning_rate": 7.017465419017921e-06,
|
| 21274 |
+
"loss": 1.8934,
|
| 21275 |
+
"step": 272600
|
| 21276 |
+
},
|
| 21277 |
+
{
|
| 21278 |
+
"epoch": 0.007535206157658839,
|
| 21279 |
+
"grad_norm": 1.0569164752960205,
|
| 21280 |
+
"learning_rate": 7.002091521390555e-06,
|
| 21281 |
+
"loss": 1.8939,
|
| 21282 |
+
"step": 272700
|
| 21283 |
+
},
|
| 21284 |
+
{
|
| 21285 |
+
"epoch": 0.00781428786720176,
|
| 21286 |
+
"grad_norm": 0.9949243068695068,
|
| 21287 |
+
"learning_rate": 6.986731739908611e-06,
|
| 21288 |
+
"loss": 1.9021,
|
| 21289 |
+
"step": 272800
|
| 21290 |
+
},
|
| 21291 |
+
{
|
| 21292 |
+
"epoch": 0.00809336957674468,
|
| 21293 |
+
"grad_norm": 1.0075616836547852,
|
| 21294 |
+
"learning_rate": 6.971386086619103e-06,
|
| 21295 |
+
"loss": 1.8978,
|
| 21296 |
+
"step": 272900
|
| 21297 |
+
},
|
| 21298 |
+
{
|
| 21299 |
+
"epoch": 0.008372451286287599,
|
| 21300 |
+
"grad_norm": 0.9863401651382446,
|
| 21301 |
+
"learning_rate": 6.9560545735579606e-06,
|
| 21302 |
+
"loss": 1.9168,
|
| 21303 |
+
"step": 273000
|
| 21304 |
+
},
|
| 21305 |
+
{
|
| 21306 |
+
"epoch": 0.008372451286287599,
|
| 21307 |
+
"eval_loss": 2.190558433532715,
|
| 21308 |
+
"eval_runtime": 51.5702,
|
| 21309 |
+
"eval_samples_per_second": 197.672,
|
| 21310 |
+
"eval_steps_per_second": 1.551,
|
| 21311 |
+
"step": 273000
|
| 21312 |
+
},
|
| 21313 |
+
{
|
| 21314 |
+
"epoch": 0.008651532995830519,
|
| 21315 |
+
"grad_norm": 0.9959931969642639,
|
| 21316 |
+
"learning_rate": 6.940737212750012e-06,
|
| 21317 |
+
"loss": 1.8908,
|
| 21318 |
+
"step": 273100
|
| 21319 |
+
},
|
| 21320 |
+
{
|
| 21321 |
+
"epoch": 0.008930614705373438,
|
| 21322 |
+
"grad_norm": 1.0437434911727905,
|
| 21323 |
+
"learning_rate": 6.9254340162089846e-06,
|
| 21324 |
+
"loss": 1.892,
|
| 21325 |
+
"step": 273200
|
| 21326 |
+
},
|
| 21327 |
+
{
|
| 21328 |
+
"epoch": 0.00920969641491636,
|
| 21329 |
+
"grad_norm": 0.9680078625679016,
|
| 21330 |
+
"learning_rate": 6.91014499593751e-06,
|
| 21331 |
+
"loss": 1.8859,
|
| 21332 |
+
"step": 273300
|
| 21333 |
+
},
|
| 21334 |
+
{
|
| 21335 |
+
"epoch": 0.00948877812445928,
|
| 21336 |
+
"grad_norm": 0.9896761775016785,
|
| 21337 |
+
"learning_rate": 6.894870163927095e-06,
|
| 21338 |
+
"loss": 1.8885,
|
| 21339 |
+
"step": 273400
|
| 21340 |
+
},
|
| 21341 |
+
{
|
| 21342 |
+
"epoch": 0.0097678598340022,
|
| 21343 |
+
"grad_norm": 1.0668361186981201,
|
| 21344 |
+
"learning_rate": 6.879609532158124e-06,
|
| 21345 |
+
"loss": 1.9031,
|
| 21346 |
+
"step": 273500
|
| 21347 |
+
},
|
| 21348 |
+
{
|
| 21349 |
+
"epoch": 0.010046941543545119,
|
| 21350 |
+
"grad_norm": 0.9838683605194092,
|
| 21351 |
+
"learning_rate": 6.864363112599823e-06,
|
| 21352 |
+
"loss": 1.9065,
|
| 21353 |
+
"step": 273600
|
| 21354 |
+
},
|
| 21355 |
+
{
|
| 21356 |
+
"epoch": 0.010326023253088039,
|
| 21357 |
+
"grad_norm": 1.0146870613098145,
|
| 21358 |
+
"learning_rate": 6.849130917210295e-06,
|
| 21359 |
+
"loss": 1.8873,
|
| 21360 |
+
"step": 273700
|
| 21361 |
+
},
|
| 21362 |
+
{
|
| 21363 |
+
"epoch": 0.01060510496263096,
|
| 21364 |
+
"grad_norm": 0.959338366985321,
|
| 21365 |
+
"learning_rate": 6.833912957936478e-06,
|
| 21366 |
+
"loss": 1.8851,
|
| 21367 |
+
"step": 273800
|
| 21368 |
+
},
|
| 21369 |
+
{
|
| 21370 |
+
"epoch": 0.01088418667217388,
|
| 21371 |
+
"grad_norm": 1.032836675643921,
|
| 21372 |
+
"learning_rate": 6.818709246714147e-06,
|
| 21373 |
+
"loss": 1.8971,
|
| 21374 |
+
"step": 273900
|
| 21375 |
+
},
|
| 21376 |
+
{
|
| 21377 |
+
"epoch": 0.0111632683817168,
|
| 21378 |
+
"grad_norm": 0.9915603399276733,
|
| 21379 |
+
"learning_rate": 6.803519795467888e-06,
|
| 21380 |
+
"loss": 1.8906,
|
| 21381 |
+
"step": 274000
|
| 21382 |
+
},
|
| 21383 |
+
{
|
| 21384 |
+
"epoch": 0.0111632683817168,
|
| 21385 |
+
"eval_loss": 2.1938321590423584,
|
| 21386 |
+
"eval_runtime": 51.6041,
|
| 21387 |
+
"eval_samples_per_second": 197.543,
|
| 21388 |
+
"eval_steps_per_second": 1.55,
|
| 21389 |
+
"step": 274000
|
| 21390 |
+
},
|
| 21391 |
+
{
|
| 21392 |
+
"epoch": 0.011442350091259719,
|
| 21393 |
+
"grad_norm": 0.9873210787773132,
|
| 21394 |
+
"learning_rate": 6.788344616111117e-06,
|
| 21395 |
+
"loss": 1.879,
|
| 21396 |
+
"step": 274100
|
| 21397 |
+
},
|
| 21398 |
+
{
|
| 21399 |
+
"epoch": 0.011721431800802639,
|
| 21400 |
+
"grad_norm": 0.9958903193473816,
|
| 21401 |
+
"learning_rate": 6.773183720546056e-06,
|
| 21402 |
+
"loss": 1.8915,
|
| 21403 |
+
"step": 274200
|
| 21404 |
+
},
|
| 21405 |
+
{
|
| 21406 |
+
"epoch": 0.012000513510345558,
|
| 21407 |
+
"grad_norm": 0.9812116026878357,
|
| 21408 |
+
"learning_rate": 6.758037120663727e-06,
|
| 21409 |
+
"loss": 1.8922,
|
| 21410 |
+
"step": 274300
|
| 21411 |
+
},
|
| 21412 |
+
{
|
| 21413 |
+
"epoch": 0.01227959521988848,
|
| 21414 |
+
"grad_norm": 1.0199834108352661,
|
| 21415 |
+
"learning_rate": 6.742904828343921e-06,
|
| 21416 |
+
"loss": 1.8928,
|
| 21417 |
+
"step": 274400
|
| 21418 |
+
},
|
| 21419 |
+
{
|
| 21420 |
+
"epoch": 0.0125586769294314,
|
| 21421 |
+
"grad_norm": 0.9892932772636414,
|
| 21422 |
+
"learning_rate": 6.727786855455218e-06,
|
| 21423 |
+
"loss": 1.8689,
|
| 21424 |
+
"step": 274500
|
| 21425 |
+
},
|
| 21426 |
+
{
|
| 21427 |
+
"epoch": 0.012837758638974319,
|
| 21428 |
+
"grad_norm": 0.9794331789016724,
|
| 21429 |
+
"learning_rate": 6.712683213854973e-06,
|
| 21430 |
+
"loss": 1.8766,
|
| 21431 |
+
"step": 274600
|
| 21432 |
+
},
|
| 21433 |
+
{
|
| 21434 |
+
"epoch": 0.013116840348517239,
|
| 21435 |
+
"grad_norm": 0.9654126763343811,
|
| 21436 |
+
"learning_rate": 6.697593915389297e-06,
|
| 21437 |
+
"loss": 1.8887,
|
| 21438 |
+
"step": 274700
|
| 21439 |
+
},
|
| 21440 |
+
{
|
| 21441 |
+
"epoch": 0.013395922058060158,
|
| 21442 |
+
"grad_norm": 0.9861681461334229,
|
| 21443 |
+
"learning_rate": 6.682518971893053e-06,
|
| 21444 |
+
"loss": 1.8936,
|
| 21445 |
+
"step": 274800
|
| 21446 |
+
},
|
| 21447 |
+
{
|
| 21448 |
+
"epoch": 0.013675003767603078,
|
| 21449 |
+
"grad_norm": 1.0138262510299683,
|
| 21450 |
+
"learning_rate": 6.667458395189835e-06,
|
| 21451 |
+
"loss": 1.8718,
|
| 21452 |
+
"step": 274900
|
| 21453 |
+
},
|
| 21454 |
+
{
|
| 21455 |
+
"epoch": 0.013954085477146,
|
| 21456 |
+
"grad_norm": 0.9910663962364197,
|
| 21457 |
+
"learning_rate": 6.652412197091979e-06,
|
| 21458 |
+
"loss": 1.8931,
|
| 21459 |
+
"step": 275000
|
| 21460 |
+
},
|
| 21461 |
+
{
|
| 21462 |
+
"epoch": 0.013954085477146,
|
| 21463 |
+
"eval_loss": 2.1973860263824463,
|
| 21464 |
+
"eval_runtime": 51.7316,
|
| 21465 |
+
"eval_samples_per_second": 197.056,
|
| 21466 |
+
"eval_steps_per_second": 1.546,
|
| 21467 |
+
"step": 275000
|
| 21468 |
+
},
|
| 21469 |
+
{
|
| 21470 |
+
"epoch": 0.01423316718668892,
|
| 21471 |
+
"grad_norm": 0.9887643456459045,
|
| 21472 |
+
"learning_rate": 6.637380389400538e-06,
|
| 21473 |
+
"loss": 1.8915,
|
| 21474 |
+
"step": 275100
|
| 21475 |
+
},
|
| 21476 |
+
{
|
| 21477 |
+
"epoch": 0.014512248896231839,
|
| 21478 |
+
"grad_norm": 1.0442452430725098,
|
| 21479 |
+
"learning_rate": 6.622362983905295e-06,
|
| 21480 |
+
"loss": 1.8866,
|
| 21481 |
+
"step": 275200
|
| 21482 |
+
},
|
| 21483 |
+
{
|
| 21484 |
+
"epoch": 0.014791330605774759,
|
| 21485 |
+
"grad_norm": 1.025341272354126,
|
| 21486 |
+
"learning_rate": 6.607359992384704e-06,
|
| 21487 |
+
"loss": 1.8727,
|
| 21488 |
+
"step": 275300
|
| 21489 |
+
},
|
| 21490 |
+
{
|
| 21491 |
+
"epoch": 0.015070412315317678,
|
| 21492 |
+
"grad_norm": 1.0826934576034546,
|
| 21493 |
+
"learning_rate": 6.592371426605942e-06,
|
| 21494 |
+
"loss": 1.878,
|
| 21495 |
+
"step": 275400
|
| 21496 |
+
},
|
| 21497 |
+
{
|
| 21498 |
+
"epoch": 0.015349494024860598,
|
| 21499 |
+
"grad_norm": 0.9907537698745728,
|
| 21500 |
+
"learning_rate": 6.5773972983248635e-06,
|
| 21501 |
+
"loss": 1.8876,
|
| 21502 |
+
"step": 275500
|
| 21503 |
+
},
|
| 21504 |
+
{
|
| 21505 |
+
"epoch": 0.01562857573440352,
|
| 21506 |
+
"grad_norm": 1.0108195543289185,
|
| 21507 |
+
"learning_rate": 6.562437619286002e-06,
|
| 21508 |
+
"loss": 1.8791,
|
| 21509 |
+
"step": 275600
|
| 21510 |
+
},
|
| 21511 |
+
{
|
| 21512 |
+
"epoch": 0.015907657443946437,
|
| 21513 |
+
"grad_norm": 0.9989004731178284,
|
| 21514 |
+
"learning_rate": 6.547492401222549e-06,
|
| 21515 |
+
"loss": 1.8747,
|
| 21516 |
+
"step": 275700
|
| 21517 |
+
},
|
| 21518 |
+
{
|
| 21519 |
+
"epoch": 0.01618673915348936,
|
| 21520 |
+
"grad_norm": 1.0045630931854248,
|
| 21521 |
+
"learning_rate": 6.532561655856351e-06,
|
| 21522 |
+
"loss": 1.8863,
|
| 21523 |
+
"step": 275800
|
| 21524 |
+
},
|
| 21525 |
+
{
|
| 21526 |
+
"epoch": 0.01646582086303228,
|
| 21527 |
+
"grad_norm": 0.9753278493881226,
|
| 21528 |
+
"learning_rate": 6.517645394897923e-06,
|
| 21529 |
+
"loss": 1.8804,
|
| 21530 |
+
"step": 275900
|
| 21531 |
+
},
|
| 21532 |
+
{
|
| 21533 |
+
"epoch": 0.016744902572575198,
|
| 21534 |
+
"grad_norm": 0.9882794618606567,
|
| 21535 |
+
"learning_rate": 6.5027436300464095e-06,
|
| 21536 |
+
"loss": 1.8751,
|
| 21537 |
+
"step": 276000
|
| 21538 |
+
},
|
| 21539 |
+
{
|
| 21540 |
+
"epoch": 0.016744902572575198,
|
| 21541 |
+
"eval_loss": 2.195430040359497,
|
| 21542 |
+
"eval_runtime": 51.7195,
|
| 21543 |
+
"eval_samples_per_second": 197.102,
|
| 21544 |
+
"eval_steps_per_second": 1.547,
|
| 21545 |
+
"step": 276000
|
| 21546 |
+
},
|
| 21547 |
+
{
|
| 21548 |
+
"epoch": 0.01702398428211812,
|
| 21549 |
+
"grad_norm": 1.0431910753250122,
|
| 21550 |
+
"learning_rate": 6.487856372989573e-06,
|
| 21551 |
+
"loss": 1.8739,
|
| 21552 |
+
"step": 276100
|
| 21553 |
+
},
|
| 21554 |
+
{
|
| 21555 |
+
"epoch": 0.017303065991661037,
|
| 21556 |
+
"grad_norm": 1.0198723077774048,
|
| 21557 |
+
"learning_rate": 6.472983635403818e-06,
|
| 21558 |
+
"loss": 1.8622,
|
| 21559 |
+
"step": 276200
|
| 21560 |
+
},
|
| 21561 |
+
{
|
| 21562 |
+
"epoch": 0.01758214770120396,
|
| 21563 |
+
"grad_norm": 1.0333479642868042,
|
| 21564 |
+
"learning_rate": 6.458125428954146e-06,
|
| 21565 |
+
"loss": 1.871,
|
| 21566 |
+
"step": 276300
|
| 21567 |
+
},
|
| 21568 |
+
{
|
| 21569 |
+
"epoch": 0.017861229410746877,
|
| 21570 |
+
"grad_norm": 0.9855126738548279,
|
| 21571 |
+
"learning_rate": 6.443281765294177e-06,
|
| 21572 |
+
"loss": 1.8632,
|
| 21573 |
+
"step": 276400
|
| 21574 |
+
},
|
| 21575 |
+
{
|
| 21576 |
+
"epoch": 0.018140311120289798,
|
| 21577 |
+
"grad_norm": 1.05318284034729,
|
| 21578 |
+
"learning_rate": 6.4284526560661005e-06,
|
| 21579 |
+
"loss": 1.8804,
|
| 21580 |
+
"step": 276500
|
| 21581 |
+
},
|
| 21582 |
+
{
|
| 21583 |
+
"epoch": 0.01841939282983272,
|
| 21584 |
+
"grad_norm": 1.0296765565872192,
|
| 21585 |
+
"learning_rate": 6.41363811290071e-06,
|
| 21586 |
+
"loss": 1.8752,
|
| 21587 |
+
"step": 276600
|
| 21588 |
+
},
|
| 21589 |
+
{
|
| 21590 |
+
"epoch": 0.018698474539375638,
|
| 21591 |
+
"grad_norm": 1.0334811210632324,
|
| 21592 |
+
"learning_rate": 6.398838147417374e-06,
|
| 21593 |
+
"loss": 1.8768,
|
| 21594 |
+
"step": 276700
|
| 21595 |
+
},
|
| 21596 |
+
{
|
| 21597 |
+
"epoch": 0.01897755624891856,
|
| 21598 |
+
"grad_norm": 0.9788868427276611,
|
| 21599 |
+
"learning_rate": 6.384052771224022e-06,
|
| 21600 |
+
"loss": 1.867,
|
| 21601 |
+
"step": 276800
|
| 21602 |
+
},
|
| 21603 |
+
{
|
| 21604 |
+
"epoch": 0.019256637958461477,
|
| 21605 |
+
"grad_norm": 1.0330471992492676,
|
| 21606 |
+
"learning_rate": 6.369281995917134e-06,
|
| 21607 |
+
"loss": 1.8668,
|
| 21608 |
+
"step": 276900
|
| 21609 |
+
},
|
| 21610 |
+
{
|
| 21611 |
+
"epoch": 0.0195357196680044,
|
| 21612 |
+
"grad_norm": 0.9711721539497375,
|
| 21613 |
+
"learning_rate": 6.354525833081759e-06,
|
| 21614 |
+
"loss": 1.8703,
|
| 21615 |
+
"step": 277000
|
| 21616 |
+
},
|
| 21617 |
+
{
|
| 21618 |
+
"epoch": 0.0195357196680044,
|
| 21619 |
+
"eval_loss": 2.19480037689209,
|
| 21620 |
+
"eval_runtime": 51.7555,
|
| 21621 |
+
"eval_samples_per_second": 196.964,
|
| 21622 |
+
"eval_steps_per_second": 1.546,
|
| 21623 |
+
"step": 277000
|
| 21624 |
+
},
|
| 21625 |
+
{
|
| 21626 |
+
"epoch": 0.01981480137754732,
|
| 21627 |
+
"grad_norm": 1.0206748247146606,
|
| 21628 |
+
"learning_rate": 6.339784294291454e-06,
|
| 21629 |
+
"loss": 1.8639,
|
| 21630 |
+
"step": 277100
|
| 21631 |
+
},
|
| 21632 |
+
{
|
| 21633 |
+
"epoch": 0.020093883087090238,
|
| 21634 |
+
"grad_norm": 1.019838571548462,
|
| 21635 |
+
"learning_rate": 6.325057391108341e-06,
|
| 21636 |
+
"loss": 1.8703,
|
| 21637 |
+
"step": 277200
|
| 21638 |
+
},
|
| 21639 |
+
{
|
| 21640 |
+
"epoch": 0.02037296479663316,
|
| 21641 |
+
"grad_norm": 0.9485549330711365,
|
| 21642 |
+
"learning_rate": 6.3103451350830316e-06,
|
| 21643 |
+
"loss": 1.8753,
|
| 21644 |
+
"step": 277300
|
| 21645 |
+
},
|
| 21646 |
+
{
|
| 21647 |
+
"epoch": 0.020652046506176077,
|
| 21648 |
+
"grad_norm": 0.9893754124641418,
|
| 21649 |
+
"learning_rate": 6.295647537754668e-06,
|
| 21650 |
+
"loss": 1.8808,
|
| 21651 |
+
"step": 277400
|
| 21652 |
+
},
|
| 21653 |
+
{
|
| 21654 |
+
"epoch": 0.020931128215719,
|
| 21655 |
+
"grad_norm": 0.9906275868415833,
|
| 21656 |
+
"learning_rate": 6.280964610650894e-06,
|
| 21657 |
+
"loss": 1.875,
|
| 21658 |
+
"step": 277500
|
| 21659 |
+
},
|
| 21660 |
+
{
|
| 21661 |
+
"epoch": 0.02121020992526192,
|
| 21662 |
+
"grad_norm": 1.0166252851486206,
|
| 21663 |
+
"learning_rate": 6.266296365287844e-06,
|
| 21664 |
+
"loss": 1.8808,
|
| 21665 |
+
"step": 277600
|
| 21666 |
+
},
|
| 21667 |
+
{
|
| 21668 |
+
"epoch": 0.021489291634804838,
|
| 21669 |
+
"grad_norm": 1.0124883651733398,
|
| 21670 |
+
"learning_rate": 6.251642813170142e-06,
|
| 21671 |
+
"loss": 1.8795,
|
| 21672 |
+
"step": 277700
|
| 21673 |
+
},
|
| 21674 |
+
{
|
| 21675 |
+
"epoch": 0.02176837334434776,
|
| 21676 |
+
"grad_norm": 1.0064804553985596,
|
| 21677 |
+
"learning_rate": 6.237003965790872e-06,
|
| 21678 |
+
"loss": 1.8692,
|
| 21679 |
+
"step": 277800
|
| 21680 |
+
},
|
| 21681 |
+
{
|
| 21682 |
+
"epoch": 0.022047455053890677,
|
| 21683 |
+
"grad_norm": 1.0103724002838135,
|
| 21684 |
+
"learning_rate": 6.222379834631598e-06,
|
| 21685 |
+
"loss": 1.8863,
|
| 21686 |
+
"step": 277900
|
| 21687 |
+
},
|
| 21688 |
+
{
|
| 21689 |
+
"epoch": 0.0223265367634336,
|
| 21690 |
+
"grad_norm": 1.0621378421783447,
|
| 21691 |
+
"learning_rate": 6.207770431162343e-06,
|
| 21692 |
+
"loss": 1.8616,
|
| 21693 |
+
"step": 278000
|
| 21694 |
+
},
|
| 21695 |
+
{
|
| 21696 |
+
"epoch": 0.0223265367634336,
|
| 21697 |
+
"eval_loss": 2.2066683769226074,
|
| 21698 |
+
"eval_runtime": 51.8686,
|
| 21699 |
+
"eval_samples_per_second": 196.535,
|
| 21700 |
+
"eval_steps_per_second": 1.542,
|
| 21701 |
+
"step": 278000
|
| 21702 |
+
},
|
| 21703 |
+
{
|
| 21704 |
+
"epoch": 0.022605618472976517,
|
| 21705 |
+
"grad_norm": 1.0321729183197021,
|
| 21706 |
+
"learning_rate": 6.1931757668415855e-06,
|
| 21707 |
+
"loss": 1.8622,
|
| 21708 |
+
"step": 278100
|
| 21709 |
+
},
|
| 21710 |
+
{
|
| 21711 |
+
"epoch": 0.022884700182519438,
|
| 21712 |
+
"grad_norm": 1.0194209814071655,
|
| 21713 |
+
"learning_rate": 6.178595853116212e-06,
|
| 21714 |
+
"loss": 1.8701,
|
| 21715 |
+
"step": 278200
|
| 21716 |
+
},
|
| 21717 |
+
{
|
| 21718 |
+
"epoch": 0.02316378189206236,
|
| 21719 |
+
"grad_norm": 1.0299484729766846,
|
| 21720 |
+
"learning_rate": 6.164030701421583e-06,
|
| 21721 |
+
"loss": 1.8809,
|
| 21722 |
+
"step": 278300
|
| 21723 |
+
},
|
| 21724 |
+
{
|
| 21725 |
+
"epoch": 0.023442863601605277,
|
| 21726 |
+
"grad_norm": 1.0566426515579224,
|
| 21727 |
+
"learning_rate": 6.149480323181439e-06,
|
| 21728 |
+
"loss": 1.871,
|
| 21729 |
+
"step": 278400
|
| 21730 |
+
},
|
| 21731 |
+
{
|
| 21732 |
+
"epoch": 0.0237219453111482,
|
| 21733 |
+
"grad_norm": 1.0435720682144165,
|
| 21734 |
+
"learning_rate": 6.134944729807971e-06,
|
| 21735 |
+
"loss": 1.8587,
|
| 21736 |
+
"step": 278500
|
| 21737 |
+
},
|
| 21738 |
+
{
|
| 21739 |
+
"epoch": 0.024001027020691117,
|
| 21740 |
+
"grad_norm": 0.9985933303833008,
|
| 21741 |
+
"learning_rate": 6.120423932701741e-06,
|
| 21742 |
+
"loss": 1.8571,
|
| 21743 |
+
"step": 278600
|
| 21744 |
+
},
|
| 21745 |
+
{
|
| 21746 |
+
"epoch": 0.024280108730234038,
|
| 21747 |
+
"grad_norm": 1.0292407274246216,
|
| 21748 |
+
"learning_rate": 6.1059179432517295e-06,
|
| 21749 |
+
"loss": 1.8612,
|
| 21750 |
+
"step": 278700
|
| 21751 |
+
},
|
| 21752 |
+
{
|
| 21753 |
+
"epoch": 0.02455919043977696,
|
| 21754 |
+
"grad_norm": 1.0021073818206787,
|
| 21755 |
+
"learning_rate": 6.091426772835293e-06,
|
| 21756 |
+
"loss": 1.8695,
|
| 21757 |
+
"step": 278800
|
| 21758 |
+
},
|
| 21759 |
+
{
|
| 21760 |
+
"epoch": 0.024838272149319877,
|
| 21761 |
+
"grad_norm": 1.0160890817642212,
|
| 21762 |
+
"learning_rate": 6.076950432818176e-06,
|
| 21763 |
+
"loss": 1.8865,
|
| 21764 |
+
"step": 278900
|
| 21765 |
+
},
|
| 21766 |
+
{
|
| 21767 |
+
"epoch": 0.0251173538588628,
|
| 21768 |
+
"grad_norm": 1.002803921699524,
|
| 21769 |
+
"learning_rate": 6.062488934554469e-06,
|
| 21770 |
+
"loss": 1.8657,
|
| 21771 |
+
"step": 279000
|
| 21772 |
+
},
|
| 21773 |
+
{
|
| 21774 |
+
"epoch": 0.0251173538588628,
|
| 21775 |
+
"eval_loss": 2.2028017044067383,
|
| 21776 |
+
"eval_runtime": 51.9876,
|
| 21777 |
+
"eval_samples_per_second": 196.085,
|
| 21778 |
+
"eval_steps_per_second": 1.539,
|
| 21779 |
+
"step": 279000
|
| 21780 |
+
},
|
| 21781 |
+
{
|
| 21782 |
+
"epoch": 0.025396435568405717,
|
| 21783 |
+
"grad_norm": 1.0246683359146118,
|
| 21784 |
+
"learning_rate": 6.048042289386643e-06,
|
| 21785 |
+
"loss": 1.8605,
|
| 21786 |
+
"step": 279100
|
| 21787 |
+
},
|
| 21788 |
+
{
|
| 21789 |
+
"epoch": 0.025675517277948638,
|
| 21790 |
+
"grad_norm": 1.0214561223983765,
|
| 21791 |
+
"learning_rate": 6.033610508645507e-06,
|
| 21792 |
+
"loss": 1.869,
|
| 21793 |
+
"step": 279200
|
| 21794 |
+
},
|
| 21795 |
+
{
|
| 21796 |
+
"epoch": 0.025954598987491556,
|
| 21797 |
+
"grad_norm": 1.0367177724838257,
|
| 21798 |
+
"learning_rate": 6.019193603650225e-06,
|
| 21799 |
+
"loss": 1.8564,
|
| 21800 |
+
"step": 279300
|
| 21801 |
+
},
|
| 21802 |
+
{
|
| 21803 |
+
"epoch": 0.026233680697034478,
|
| 21804 |
+
"grad_norm": 1.0507416725158691,
|
| 21805 |
+
"learning_rate": 6.004791585708272e-06,
|
| 21806 |
+
"loss": 1.8819,
|
| 21807 |
+
"step": 279400
|
| 21808 |
+
},
|
| 21809 |
+
{
|
| 21810 |
+
"epoch": 0.0265127624065774,
|
| 21811 |
+
"grad_norm": 1.0242899656295776,
|
| 21812 |
+
"learning_rate": 5.990404466115465e-06,
|
| 21813 |
+
"loss": 1.8804,
|
| 21814 |
+
"step": 279500
|
| 21815 |
+
},
|
| 21816 |
+
{
|
| 21817 |
+
"epoch": 0.026791844116120317,
|
| 21818 |
+
"grad_norm": 1.0185377597808838,
|
| 21819 |
+
"learning_rate": 5.976032256155939e-06,
|
| 21820 |
+
"loss": 1.873,
|
| 21821 |
+
"step": 279600
|
| 21822 |
+
},
|
| 21823 |
+
{
|
| 21824 |
+
"epoch": 0.02707092582566324,
|
| 21825 |
+
"grad_norm": 1.0047358274459839,
|
| 21826 |
+
"learning_rate": 5.961674967102113e-06,
|
| 21827 |
+
"loss": 1.8726,
|
| 21828 |
+
"step": 279700
|
| 21829 |
+
},
|
| 21830 |
+
{
|
| 21831 |
+
"epoch": 0.027350007535206156,
|
| 21832 |
+
"grad_norm": 1.0124741792678833,
|
| 21833 |
+
"learning_rate": 5.9473326102147255e-06,
|
| 21834 |
+
"loss": 1.8711,
|
| 21835 |
+
"step": 279800
|
| 21836 |
+
},
|
| 21837 |
+
{
|
| 21838 |
+
"epoch": 0.027629089244749078,
|
| 21839 |
+
"grad_norm": 1.0482338666915894,
|
| 21840 |
+
"learning_rate": 5.933005196742783e-06,
|
| 21841 |
+
"loss": 1.8599,
|
| 21842 |
+
"step": 279900
|
| 21843 |
+
},
|
| 21844 |
+
{
|
| 21845 |
+
"epoch": 0.027908170954292,
|
| 21846 |
+
"grad_norm": 0.9833710193634033,
|
| 21847 |
+
"learning_rate": 5.918692737923592e-06,
|
| 21848 |
+
"loss": 1.8488,
|
| 21849 |
+
"step": 280000
|
| 21850 |
+
},
|
| 21851 |
+
{
|
| 21852 |
+
"epoch": 0.027908170954292,
|
| 21853 |
+
"eval_loss": 2.2023353576660156,
|
| 21854 |
+
"eval_runtime": 51.8909,
|
| 21855 |
+
"eval_samples_per_second": 196.451,
|
| 21856 |
+
"eval_steps_per_second": 1.542,
|
| 21857 |
+
"step": 280000
|
| 21858 |
+
},
|
| 21859 |
+
{
|
| 21860 |
+
"epoch": 0.028187252663834917,
|
| 21861 |
+
"grad_norm": 1.0626851320266724,
|
| 21862 |
+
"learning_rate": 5.9043952449827275e-06,
|
| 21863 |
+
"loss": 1.8484,
|
| 21864 |
+
"step": 280100
|
| 21865 |
+
},
|
| 21866 |
+
{
|
| 21867 |
+
"epoch": 0.02846633437337784,
|
| 21868 |
+
"grad_norm": 1.0383021831512451,
|
| 21869 |
+
"learning_rate": 5.890112729134004e-06,
|
| 21870 |
+
"loss": 1.8728,
|
| 21871 |
+
"step": 280200
|
| 21872 |
+
},
|
| 21873 |
+
{
|
| 21874 |
+
"epoch": 0.028745416082920756,
|
| 21875 |
+
"grad_norm": 1.0421721935272217,
|
| 21876 |
+
"learning_rate": 5.875845201579513e-06,
|
| 21877 |
+
"loss": 1.8676,
|
| 21878 |
+
"step": 280300
|
| 21879 |
+
},
|
| 21880 |
+
{
|
| 21881 |
+
"epoch": 0.029024497792463678,
|
| 21882 |
+
"grad_norm": 1.0611158609390259,
|
| 21883 |
+
"learning_rate": 5.861592673509581e-06,
|
| 21884 |
+
"loss": 1.8549,
|
| 21885 |
+
"step": 280400
|
| 21886 |
+
},
|
| 21887 |
+
{
|
| 21888 |
+
"epoch": 0.0293035795020066,
|
| 21889 |
+
"grad_norm": 1.0284640789031982,
|
| 21890 |
+
"learning_rate": 5.847355156102771e-06,
|
| 21891 |
+
"loss": 1.8523,
|
| 21892 |
+
"step": 280500
|
| 21893 |
+
},
|
| 21894 |
+
{
|
| 21895 |
+
"epoch": 0.029582661211549517,
|
| 21896 |
+
"grad_norm": 0.9916824698448181,
|
| 21897 |
+
"learning_rate": 5.833132660525883e-06,
|
| 21898 |
+
"loss": 1.864,
|
| 21899 |
+
"step": 280600
|
| 21900 |
+
},
|
| 21901 |
+
{
|
| 21902 |
+
"epoch": 0.02986174292109244,
|
| 21903 |
+
"grad_norm": 1.0592784881591797,
|
| 21904 |
+
"learning_rate": 5.818925197933911e-06,
|
| 21905 |
+
"loss": 1.8686,
|
| 21906 |
+
"step": 280700
|
| 21907 |
+
},
|
| 21908 |
+
{
|
| 21909 |
+
"epoch": 0.030140824630635357,
|
| 21910 |
+
"grad_norm": 1.014319896697998,
|
| 21911 |
+
"learning_rate": 5.804732779470074e-06,
|
| 21912 |
+
"loss": 1.8572,
|
| 21913 |
+
"step": 280800
|
| 21914 |
+
},
|
| 21915 |
+
{
|
| 21916 |
+
"epoch": 0.030419906340178278,
|
| 21917 |
+
"grad_norm": 1.017314076423645,
|
| 21918 |
+
"learning_rate": 5.7905554162658025e-06,
|
| 21919 |
+
"loss": 1.8666,
|
| 21920 |
+
"step": 280900
|
| 21921 |
+
},
|
| 21922 |
+
{
|
| 21923 |
+
"epoch": 0.030698988049721196,
|
| 21924 |
+
"grad_norm": 1.0180898904800415,
|
| 21925 |
+
"learning_rate": 5.77639311944069e-06,
|
| 21926 |
+
"loss": 1.8735,
|
| 21927 |
+
"step": 281000
|
| 21928 |
+
},
|
| 21929 |
+
{
|
| 21930 |
+
"epoch": 0.030698988049721196,
|
| 21931 |
+
"eval_loss": 2.203965187072754,
|
| 21932 |
+
"eval_runtime": 52.0422,
|
| 21933 |
+
"eval_samples_per_second": 195.879,
|
| 21934 |
+
"eval_steps_per_second": 1.537,
|
| 21935 |
+
"step": 281000
|
| 21936 |
+
},
|
| 21937 |
+
{
|
| 21938 |
+
"epoch": 0.030978069759264117,
|
| 21939 |
+
"grad_norm": 0.9797289371490479,
|
| 21940 |
+
"learning_rate": 5.762245900102545e-06,
|
| 21941 |
+
"loss": 1.8685,
|
| 21942 |
+
"step": 281100
|
| 21943 |
+
},
|
| 21944 |
+
{
|
| 21945 |
+
"epoch": 0.03125715146880704,
|
| 21946 |
+
"grad_norm": 1.0343824625015259,
|
| 21947 |
+
"learning_rate": 5.748113769347319e-06,
|
| 21948 |
+
"loss": 1.836,
|
| 21949 |
+
"step": 281200
|
| 21950 |
+
},
|
| 21951 |
+
{
|
| 21952 |
+
"epoch": 0.03153623317834996,
|
| 21953 |
+
"grad_norm": 1.0038707256317139,
|
| 21954 |
+
"learning_rate": 5.7339967382591534e-06,
|
| 21955 |
+
"loss": 1.8629,
|
| 21956 |
+
"step": 281300
|
| 21957 |
+
},
|
| 21958 |
+
{
|
| 21959 |
+
"epoch": 0.031815314887892875,
|
| 21960 |
+
"grad_norm": 1.0676428079605103,
|
| 21961 |
+
"learning_rate": 5.7198948179103455e-06,
|
| 21962 |
+
"loss": 1.8547,
|
| 21963 |
+
"step": 281400
|
| 21964 |
+
},
|
| 21965 |
+
{
|
| 21966 |
+
"epoch": 0.0320943965974358,
|
| 21967 |
+
"grad_norm": 1.0127052068710327,
|
| 21968 |
+
"learning_rate": 5.70580801936132e-06,
|
| 21969 |
+
"loss": 1.8591,
|
| 21970 |
+
"step": 281500
|
| 21971 |
+
},
|
| 21972 |
+
{
|
| 21973 |
+
"epoch": 0.03237347830697872,
|
| 21974 |
+
"grad_norm": 0.9936082363128662,
|
| 21975 |
+
"learning_rate": 5.6917363536606596e-06,
|
| 21976 |
+
"loss": 1.827,
|
| 21977 |
+
"step": 281600
|
| 21978 |
+
},
|
| 21979 |
+
{
|
| 21980 |
+
"epoch": 0.032652560016521635,
|
| 21981 |
+
"grad_norm": 1.0413488149642944,
|
| 21982 |
+
"learning_rate": 5.6776798318450755e-06,
|
| 21983 |
+
"loss": 1.8518,
|
| 21984 |
+
"step": 281700
|
| 21985 |
+
},
|
| 21986 |
+
{
|
| 21987 |
+
"epoch": 0.03293164172606456,
|
| 21988 |
+
"grad_norm": 1.0689826011657715,
|
| 21989 |
+
"learning_rate": 5.663638464939405e-06,
|
| 21990 |
+
"loss": 1.8469,
|
| 21991 |
+
"step": 281800
|
| 21992 |
+
},
|
| 21993 |
+
{
|
| 21994 |
+
"epoch": 0.03321072343560748,
|
| 21995 |
+
"grad_norm": 1.0071433782577515,
|
| 21996 |
+
"learning_rate": 5.64961226395658e-06,
|
| 21997 |
+
"loss": 1.8499,
|
| 21998 |
+
"step": 281900
|
| 21999 |
+
},
|
| 22000 |
+
{
|
| 22001 |
+
"epoch": 0.033489805145150396,
|
| 22002 |
+
"grad_norm": 1.0158226490020752,
|
| 22003 |
+
"learning_rate": 5.635601239897659e-06,
|
| 22004 |
+
"loss": 1.8408,
|
| 22005 |
+
"step": 282000
|
| 22006 |
+
},
|
| 22007 |
+
{
|
| 22008 |
+
"epoch": 0.033489805145150396,
|
| 22009 |
+
"eval_loss": 2.204533338546753,
|
| 22010 |
+
"eval_runtime": 52.0127,
|
| 22011 |
+
"eval_samples_per_second": 195.991,
|
| 22012 |
+
"eval_steps_per_second": 1.538,
|
| 22013 |
+
"step": 282000
|
| 22014 |
+
},
|
| 22015 |
+
{
|
| 22016 |
+
"epoch": 0.033768886854693314,
|
| 22017 |
+
"grad_norm": 1.0187814235687256,
|
| 22018 |
+
"learning_rate": 5.6216054037517865e-06,
|
| 22019 |
+
"loss": 1.8592,
|
| 22020 |
+
"step": 282100
|
| 22021 |
+
},
|
| 22022 |
+
{
|
| 22023 |
+
"epoch": 0.03404796856423624,
|
| 22024 |
+
"grad_norm": 1.0497030019760132,
|
| 22025 |
+
"learning_rate": 5.607624766496203e-06,
|
| 22026 |
+
"loss": 1.866,
|
| 22027 |
+
"step": 282200
|
| 22028 |
+
},
|
| 22029 |
+
{
|
| 22030 |
+
"epoch": 0.03432705027377916,
|
| 22031 |
+
"grad_norm": 1.0341991186141968,
|
| 22032 |
+
"learning_rate": 5.5936593390962165e-06,
|
| 22033 |
+
"loss": 1.8463,
|
| 22034 |
+
"step": 282300
|
| 22035 |
+
},
|
| 22036 |
+
{
|
| 22037 |
+
"epoch": 0.034606131983322075,
|
| 22038 |
+
"grad_norm": 1.0514994859695435,
|
| 22039 |
+
"learning_rate": 5.579709132505203e-06,
|
| 22040 |
+
"loss": 1.8384,
|
| 22041 |
+
"step": 282400
|
| 22042 |
+
},
|
| 22043 |
+
{
|
| 22044 |
+
"epoch": 0.034885213692865,
|
| 22045 |
+
"grad_norm": 1.0009490251541138,
|
| 22046 |
+
"learning_rate": 5.565774157664616e-06,
|
| 22047 |
+
"loss": 1.8544,
|
| 22048 |
+
"step": 282500
|
| 22049 |
+
},
|
| 22050 |
+
{
|
| 22051 |
+
"epoch": 0.03516429540240792,
|
| 22052 |
+
"grad_norm": 1.033133625984192,
|
| 22053 |
+
"learning_rate": 5.551854425503964e-06,
|
| 22054 |
+
"loss": 1.871,
|
| 22055 |
+
"step": 282600
|
| 22056 |
+
},
|
| 22057 |
+
{
|
| 22058 |
+
"epoch": 0.035443377111950836,
|
| 22059 |
+
"grad_norm": 1.0105431079864502,
|
| 22060 |
+
"learning_rate": 5.537949946940774e-06,
|
| 22061 |
+
"loss": 1.8499,
|
| 22062 |
+
"step": 282700
|
| 22063 |
+
},
|
| 22064 |
+
{
|
| 22065 |
+
"epoch": 0.035722458821493754,
|
| 22066 |
+
"grad_norm": 1.0701624155044556,
|
| 22067 |
+
"learning_rate": 5.524060732880637e-06,
|
| 22068 |
+
"loss": 1.8447,
|
| 22069 |
+
"step": 282800
|
| 22070 |
+
},
|
| 22071 |
+
{
|
| 22072 |
+
"epoch": 0.03600154053103668,
|
| 22073 |
+
"grad_norm": 1.0628591775894165,
|
| 22074 |
+
"learning_rate": 5.510186794217157e-06,
|
| 22075 |
+
"loss": 1.8413,
|
| 22076 |
+
"step": 282900
|
| 22077 |
+
},
|
| 22078 |
+
{
|
| 22079 |
+
"epoch": 0.036280622240579596,
|
| 22080 |
+
"grad_norm": 1.0623475313186646,
|
| 22081 |
+
"learning_rate": 5.4963281418319716e-06,
|
| 22082 |
+
"loss": 1.8549,
|
| 22083 |
+
"step": 283000
|
| 22084 |
+
},
|
| 22085 |
+
{
|
| 22086 |
+
"epoch": 0.036280622240579596,
|
| 22087 |
+
"eval_loss": 2.2079403400421143,
|
| 22088 |
+
"eval_runtime": 52.0695,
|
| 22089 |
+
"eval_samples_per_second": 195.777,
|
| 22090 |
+
"eval_steps_per_second": 1.536,
|
| 22091 |
+
"step": 283000
|
| 22092 |
+
},
|
| 22093 |
+
{
|
| 22094 |
+
"epoch": 0.036559703950122514,
|
| 22095 |
+
"grad_norm": 1.002023696899414,
|
| 22096 |
+
"learning_rate": 5.4824847865947045e-06,
|
| 22097 |
+
"loss": 1.8312,
|
| 22098 |
+
"step": 283100
|
| 22099 |
+
},
|
| 22100 |
+
{
|
| 22101 |
+
"epoch": 0.03683878565966544,
|
| 22102 |
+
"grad_norm": 1.057966947555542,
|
| 22103 |
+
"learning_rate": 5.468656739363004e-06,
|
| 22104 |
+
"loss": 1.8501,
|
| 22105 |
+
"step": 283200
|
| 22106 |
+
},
|
| 22107 |
+
{
|
| 22108 |
+
"epoch": 0.03711786736920836,
|
| 22109 |
+
"grad_norm": 1.0365926027297974,
|
| 22110 |
+
"learning_rate": 5.454844010982504e-06,
|
| 22111 |
+
"loss": 1.861,
|
| 22112 |
+
"step": 283300
|
| 22113 |
+
},
|
| 22114 |
+
{
|
| 22115 |
+
"epoch": 0.037396949078751275,
|
| 22116 |
+
"grad_norm": 0.9884259700775146,
|
| 22117 |
+
"learning_rate": 5.441046612286827e-06,
|
| 22118 |
+
"loss": 1.8495,
|
| 22119 |
+
"step": 283400
|
| 22120 |
+
},
|
| 22121 |
+
{
|
| 22122 |
+
"epoch": 0.0376760307882942,
|
| 22123 |
+
"grad_norm": 1.0611577033996582,
|
| 22124 |
+
"learning_rate": 5.427264554097555e-06,
|
| 22125 |
+
"loss": 1.8521,
|
| 22126 |
+
"step": 283500
|
| 22127 |
+
},
|
| 22128 |
+
{
|
| 22129 |
+
"epoch": 0.03795511249783712,
|
| 22130 |
+
"grad_norm": 1.0047646760940552,
|
| 22131 |
+
"learning_rate": 5.413497847224272e-06,
|
| 22132 |
+
"loss": 1.8497,
|
| 22133 |
+
"step": 283600
|
| 22134 |
+
},
|
| 22135 |
+
{
|
| 22136 |
+
"epoch": 0.038234194207380036,
|
| 22137 |
+
"grad_norm": 1.0214877128601074,
|
| 22138 |
+
"learning_rate": 5.399746502464479e-06,
|
| 22139 |
+
"loss": 1.847,
|
| 22140 |
+
"step": 283700
|
| 22141 |
+
},
|
| 22142 |
+
{
|
| 22143 |
+
"epoch": 0.038513275916922954,
|
| 22144 |
+
"grad_norm": 1.0316286087036133,
|
| 22145 |
+
"learning_rate": 5.386010530603663e-06,
|
| 22146 |
+
"loss": 1.8566,
|
| 22147 |
+
"step": 283800
|
| 22148 |
+
},
|
| 22149 |
+
{
|
| 22150 |
+
"epoch": 0.03879235762646588,
|
| 22151 |
+
"grad_norm": 1.046181559562683,
|
| 22152 |
+
"learning_rate": 5.3722899424152456e-06,
|
| 22153 |
+
"loss": 1.856,
|
| 22154 |
+
"step": 283900
|
| 22155 |
+
},
|
| 22156 |
+
{
|
| 22157 |
+
"epoch": 0.0390714393360088,
|
| 22158 |
+
"grad_norm": 1.1351335048675537,
|
| 22159 |
+
"learning_rate": 5.358584748660567e-06,
|
| 22160 |
+
"loss": 1.857,
|
| 22161 |
+
"step": 284000
|
| 22162 |
+
},
|
| 22163 |
+
{
|
| 22164 |
+
"epoch": 0.0390714393360088,
|
| 22165 |
+
"eval_loss": 2.196443796157837,
|
| 22166 |
+
"eval_runtime": 52.012,
|
| 22167 |
+
"eval_samples_per_second": 195.993,
|
| 22168 |
+
"eval_steps_per_second": 1.538,
|
| 22169 |
+
"step": 284000
|
| 22170 |
+
},
|
| 22171 |
+
{
|
| 22172 |
+
"epoch": 0.039350521045551715,
|
| 22173 |
+
"grad_norm": 1.0209579467773438,
|
| 22174 |
+
"learning_rate": 5.344894960088906e-06,
|
| 22175 |
+
"loss": 1.8367,
|
| 22176 |
+
"step": 284100
|
| 22177 |
+
},
|
| 22178 |
+
{
|
| 22179 |
+
"epoch": 0.03962960275509464,
|
| 22180 |
+
"grad_norm": 0.9978814125061035,
|
| 22181 |
+
"learning_rate": 5.331220587437463e-06,
|
| 22182 |
+
"loss": 1.8627,
|
| 22183 |
+
"step": 284200
|
| 22184 |
+
},
|
| 22185 |
+
{
|
| 22186 |
+
"epoch": 0.03990868446463756,
|
| 22187 |
+
"grad_norm": 1.0793671607971191,
|
| 22188 |
+
"learning_rate": 5.317561641431349e-06,
|
| 22189 |
+
"loss": 1.8504,
|
| 22190 |
+
"step": 284300
|
| 22191 |
+
},
|
| 22192 |
+
{
|
| 22193 |
+
"epoch": 0.040187766174180475,
|
| 22194 |
+
"grad_norm": 0.9899407029151917,
|
| 22195 |
+
"learning_rate": 5.303918132783547e-06,
|
| 22196 |
+
"loss": 1.8294,
|
| 22197 |
+
"step": 284400
|
| 22198 |
+
},
|
| 22199 |
+
{
|
| 22200 |
+
"epoch": 0.04046684788372339,
|
| 22201 |
+
"grad_norm": 1.0489463806152344,
|
| 22202 |
+
"learning_rate": 5.290290072194967e-06,
|
| 22203 |
+
"loss": 1.8507,
|
| 22204 |
+
"step": 284500
|
| 22205 |
+
},
|
| 22206 |
+
{
|
| 22207 |
+
"epoch": 0.04074592959326632,
|
| 22208 |
+
"grad_norm": 1.0309258699417114,
|
| 22209 |
+
"learning_rate": 5.2766774703543855e-06,
|
| 22210 |
+
"loss": 1.8558,
|
| 22211 |
+
"step": 284600
|
| 22212 |
+
},
|
| 22213 |
+
{
|
| 22214 |
+
"epoch": 0.041025011302809236,
|
| 22215 |
+
"grad_norm": 1.0452024936676025,
|
| 22216 |
+
"learning_rate": 5.2630803379384665e-06,
|
| 22217 |
+
"loss": 1.8562,
|
| 22218 |
+
"step": 284700
|
| 22219 |
+
},
|
| 22220 |
+
{
|
| 22221 |
+
"epoch": 0.041304093012352154,
|
| 22222 |
+
"grad_norm": 1.0156536102294922,
|
| 22223 |
+
"learning_rate": 5.24949868561172e-06,
|
| 22224 |
+
"loss": 1.8487,
|
| 22225 |
+
"step": 284800
|
| 22226 |
+
},
|
| 22227 |
+
{
|
| 22228 |
+
"epoch": 0.04158317472189508,
|
| 22229 |
+
"grad_norm": 1.0449084043502808,
|
| 22230 |
+
"learning_rate": 5.2359325240265375e-06,
|
| 22231 |
+
"loss": 1.8601,
|
| 22232 |
+
"step": 284900
|
| 22233 |
+
},
|
| 22234 |
+
{
|
| 22235 |
+
"epoch": 0.041862256431438,
|
| 22236 |
+
"grad_norm": 1.0443174839019775,
|
| 22237 |
+
"learning_rate": 5.222381863823139e-06,
|
| 22238 |
+
"loss": 1.8411,
|
| 22239 |
+
"step": 285000
|
| 22240 |
+
},
|
| 22241 |
+
{
|
| 22242 |
+
"epoch": 0.041862256431438,
|
| 22243 |
+
"eval_loss": 2.207988739013672,
|
| 22244 |
+
"eval_runtime": 51.9411,
|
| 22245 |
+
"eval_samples_per_second": 196.261,
|
| 22246 |
+
"eval_steps_per_second": 1.54,
|
| 22247 |
+
"step": 285000
|
| 22248 |
+
},
|
| 22249 |
+
{
|
| 22250 |
+
"epoch": 0.042141338140980915,
|
| 22251 |
+
"grad_norm": 1.0384715795516968,
|
| 22252 |
+
"learning_rate": 5.208846715629609e-06,
|
| 22253 |
+
"loss": 1.8524,
|
| 22254 |
+
"step": 285100
|
| 22255 |
+
},
|
| 22256 |
+
{
|
| 22257 |
+
"epoch": 0.04242041985052384,
|
| 22258 |
+
"grad_norm": 1.0093611478805542,
|
| 22259 |
+
"learning_rate": 5.195327090061844e-06,
|
| 22260 |
+
"loss": 1.848,
|
| 22261 |
+
"step": 285200
|
| 22262 |
+
},
|
| 22263 |
+
{
|
| 22264 |
+
"epoch": 0.04269950156006676,
|
| 22265 |
+
"grad_norm": 1.0251661539077759,
|
| 22266 |
+
"learning_rate": 5.181822997723582e-06,
|
| 22267 |
+
"loss": 1.8428,
|
| 22268 |
+
"step": 285300
|
| 22269 |
+
},
|
| 22270 |
+
{
|
| 22271 |
+
"epoch": 0.042978583269609676,
|
| 22272 |
+
"grad_norm": 1.056368112564087,
|
| 22273 |
+
"learning_rate": 5.168334449206372e-06,
|
| 22274 |
+
"loss": 1.858,
|
| 22275 |
+
"step": 285400
|
| 22276 |
+
},
|
| 22277 |
+
{
|
| 22278 |
+
"epoch": 0.043257664979152594,
|
| 22279 |
+
"grad_norm": 1.0536158084869385,
|
| 22280 |
+
"learning_rate": 5.154861455089577e-06,
|
| 22281 |
+
"loss": 1.8375,
|
| 22282 |
+
"step": 285500
|
| 22283 |
+
},
|
| 22284 |
+
{
|
| 22285 |
+
"epoch": 0.04353674668869552,
|
| 22286 |
+
"grad_norm": 1.0133367776870728,
|
| 22287 |
+
"learning_rate": 5.141404025940341e-06,
|
| 22288 |
+
"loss": 1.8382,
|
| 22289 |
+
"step": 285600
|
| 22290 |
+
},
|
| 22291 |
+
{
|
| 22292 |
+
"epoch": 0.043815828398238436,
|
| 22293 |
+
"grad_norm": 1.0329740047454834,
|
| 22294 |
+
"learning_rate": 5.127962172313624e-06,
|
| 22295 |
+
"loss": 1.8546,
|
| 22296 |
+
"step": 285700
|
| 22297 |
+
},
|
| 22298 |
+
{
|
| 22299 |
+
"epoch": 0.044094910107781354,
|
| 22300 |
+
"grad_norm": 1.0410008430480957,
|
| 22301 |
+
"learning_rate": 5.114535904752157e-06,
|
| 22302 |
+
"loss": 1.8303,
|
| 22303 |
+
"step": 285800
|
| 22304 |
+
},
|
| 22305 |
+
{
|
| 22306 |
+
"epoch": 0.04437399181732428,
|
| 22307 |
+
"grad_norm": 1.0532476902008057,
|
| 22308 |
+
"learning_rate": 5.1011252337864605e-06,
|
| 22309 |
+
"loss": 1.8418,
|
| 22310 |
+
"step": 285900
|
| 22311 |
+
},
|
| 22312 |
+
{
|
| 22313 |
+
"epoch": 0.0446530735268672,
|
| 22314 |
+
"grad_norm": 1.031091332435608,
|
| 22315 |
+
"learning_rate": 5.087730169934793e-06,
|
| 22316 |
+
"loss": 1.8524,
|
| 22317 |
+
"step": 286000
|
| 22318 |
+
},
|
| 22319 |
+
{
|
| 22320 |
+
"epoch": 0.0446530735268672,
|
| 22321 |
+
"eval_loss": 2.2169249057769775,
|
| 22322 |
+
"eval_runtime": 52.0382,
|
| 22323 |
+
"eval_samples_per_second": 195.895,
|
| 22324 |
+
"eval_steps_per_second": 1.537,
|
| 22325 |
+
"step": 286000
|
| 22326 |
+
},
|
| 22327 |
+
{
|
| 22328 |
+
"epoch": 0.044932155236410115,
|
| 22329 |
+
"grad_norm": 1.0077662467956543,
|
| 22330 |
+
"learning_rate": 5.0743507237032e-06,
|
| 22331 |
+
"loss": 1.8372,
|
| 22332 |
+
"step": 286100
|
| 22333 |
+
},
|
| 22334 |
+
{
|
| 22335 |
+
"epoch": 0.04521123694595303,
|
| 22336 |
+
"grad_norm": 0.9833052754402161,
|
| 22337 |
+
"learning_rate": 5.0609869055854714e-06,
|
| 22338 |
+
"loss": 1.8493,
|
| 22339 |
+
"step": 286200
|
| 22340 |
+
},
|
| 22341 |
+
{
|
| 22342 |
+
"epoch": 0.04549031865549596,
|
| 22343 |
+
"grad_norm": 1.0318917036056519,
|
| 22344 |
+
"learning_rate": 5.047638726063128e-06,
|
| 22345 |
+
"loss": 1.8331,
|
| 22346 |
+
"step": 286300
|
| 22347 |
+
},
|
| 22348 |
+
{
|
| 22349 |
+
"epoch": 0.045769400365038876,
|
| 22350 |
+
"grad_norm": 1.0132189989089966,
|
| 22351 |
+
"learning_rate": 5.03430619560544e-06,
|
| 22352 |
+
"loss": 1.836,
|
| 22353 |
+
"step": 286400
|
| 22354 |
+
},
|
| 22355 |
+
{
|
| 22356 |
+
"epoch": 0.046048482074581794,
|
| 22357 |
+
"grad_norm": 1.016453742980957,
|
| 22358 |
+
"learning_rate": 5.0209893246693895e-06,
|
| 22359 |
+
"loss": 1.8561,
|
| 22360 |
+
"step": 286500
|
| 22361 |
+
},
|
| 22362 |
+
{
|
| 22363 |
+
"epoch": 0.04632756378412472,
|
| 22364 |
+
"grad_norm": 1.0423955917358398,
|
| 22365 |
+
"learning_rate": 5.007688123699686e-06,
|
| 22366 |
+
"loss": 1.8488,
|
| 22367 |
+
"step": 286600
|
| 22368 |
+
},
|
| 22369 |
+
{
|
| 22370 |
+
"epoch": 0.04660664549366764,
|
| 22371 |
+
"grad_norm": 1.0047492980957031,
|
| 22372 |
+
"learning_rate": 4.994402603128751e-06,
|
| 22373 |
+
"loss": 1.8384,
|
| 22374 |
+
"step": 286700
|
| 22375 |
+
},
|
| 22376 |
+
{
|
| 22377 |
+
"epoch": 0.046885727203210555,
|
| 22378 |
+
"grad_norm": 1.0472545623779297,
|
| 22379 |
+
"learning_rate": 4.981132773376704e-06,
|
| 22380 |
+
"loss": 1.8359,
|
| 22381 |
+
"step": 286800
|
| 22382 |
+
},
|
| 22383 |
+
{
|
| 22384 |
+
"epoch": 0.04716480891275347,
|
| 22385 |
+
"grad_norm": 1.0129274129867554,
|
| 22386 |
+
"learning_rate": 4.967878644851351e-06,
|
| 22387 |
+
"loss": 1.8428,
|
| 22388 |
+
"step": 286900
|
| 22389 |
+
},
|
| 22390 |
+
{
|
| 22391 |
+
"epoch": 0.0474438906222964,
|
| 22392 |
+
"grad_norm": 1.0653069019317627,
|
| 22393 |
+
"learning_rate": 4.954640227948188e-06,
|
| 22394 |
+
"loss": 1.8516,
|
| 22395 |
+
"step": 287000
|
| 22396 |
+
},
|
| 22397 |
+
{
|
| 22398 |
+
"epoch": 0.0474438906222964,
|
| 22399 |
+
"eval_loss": 2.207484483718872,
|
| 22400 |
+
"eval_runtime": 51.9821,
|
| 22401 |
+
"eval_samples_per_second": 196.106,
|
| 22402 |
+
"eval_steps_per_second": 1.539,
|
| 22403 |
+
"step": 287000
|
| 22404 |
+
},
|
| 22405 |
+
{
|
| 22406 |
+
"epoch": 0.047722972331839315,
|
| 22407 |
+
"grad_norm": 1.0424509048461914,
|
| 22408 |
+
"learning_rate": 4.941417533050394e-06,
|
| 22409 |
+
"loss": 1.8518,
|
| 22410 |
+
"step": 287100
|
| 22411 |
+
},
|
| 22412 |
+
{
|
| 22413 |
+
"epoch": 0.04800205404138223,
|
| 22414 |
+
"grad_norm": 1.0581769943237305,
|
| 22415 |
+
"learning_rate": 4.9282105705288185e-06,
|
| 22416 |
+
"loss": 1.8239,
|
| 22417 |
+
"step": 287200
|
| 22418 |
+
},
|
| 22419 |
+
{
|
| 22420 |
+
"epoch": 0.04828113575092516,
|
| 22421 |
+
"grad_norm": 1.0191422700881958,
|
| 22422 |
+
"learning_rate": 4.9150193507419505e-06,
|
| 22423 |
+
"loss": 1.8555,
|
| 22424 |
+
"step": 287300
|
| 22425 |
+
},
|
| 22426 |
+
{
|
| 22427 |
+
"epoch": 0.048560217460468076,
|
| 22428 |
+
"grad_norm": 1.0733542442321777,
|
| 22429 |
+
"learning_rate": 4.901843884035953e-06,
|
| 22430 |
+
"loss": 1.8397,
|
| 22431 |
+
"step": 287400
|
| 22432 |
+
},
|
| 22433 |
+
{
|
| 22434 |
+
"epoch": 0.048839299170010994,
|
| 22435 |
+
"grad_norm": 1.0520180463790894,
|
| 22436 |
+
"learning_rate": 4.888684180744635e-06,
|
| 22437 |
+
"loss": 1.841,
|
| 22438 |
+
"step": 287500
|
| 22439 |
+
},
|
| 22440 |
+
{
|
| 22441 |
+
"epoch": 0.04911838087955392,
|
| 22442 |
+
"grad_norm": 1.047424077987671,
|
| 22443 |
+
"learning_rate": 4.8755402511894175e-06,
|
| 22444 |
+
"loss": 1.8359,
|
| 22445 |
+
"step": 287600
|
| 22446 |
+
},
|
| 22447 |
+
{
|
| 22448 |
+
"epoch": 0.04939746258909684,
|
| 22449 |
+
"grad_norm": 1.0499184131622314,
|
| 22450 |
+
"learning_rate": 4.862412105679384e-06,
|
| 22451 |
+
"loss": 1.8258,
|
| 22452 |
+
"step": 287700
|
| 22453 |
+
},
|
| 22454 |
+
{
|
| 22455 |
+
"epoch": 0.049676544298639755,
|
| 22456 |
+
"grad_norm": 1.0351871252059937,
|
| 22457 |
+
"learning_rate": 4.849299754511205e-06,
|
| 22458 |
+
"loss": 1.8348,
|
| 22459 |
+
"step": 287800
|
| 22460 |
+
},
|
| 22461 |
+
{
|
| 22462 |
+
"epoch": 0.04995562600818267,
|
| 22463 |
+
"grad_norm": 1.0380860567092896,
|
| 22464 |
+
"learning_rate": 4.836203207969183e-06,
|
| 22465 |
+
"loss": 1.8306,
|
| 22466 |
+
"step": 287900
|
| 22467 |
+
},
|
| 22468 |
+
{
|
| 22469 |
+
"epoch": 0.0502347077177256,
|
| 22470 |
+
"grad_norm": 1.0105301141738892,
|
| 22471 |
+
"learning_rate": 4.823122476325231e-06,
|
| 22472 |
+
"loss": 1.8432,
|
| 22473 |
+
"step": 288000
|
| 22474 |
+
},
|
| 22475 |
+
{
|
| 22476 |
+
"epoch": 0.0502347077177256,
|
| 22477 |
+
"eval_loss": 2.199079751968384,
|
| 22478 |
+
"eval_runtime": 52.0806,
|
| 22479 |
+
"eval_samples_per_second": 195.735,
|
| 22480 |
+
"eval_steps_per_second": 1.536,
|
| 22481 |
+
"step": 288000
|
| 22482 |
+
},
|
| 22483 |
+
{
|
| 22484 |
+
"epoch": 0.00027908170954291995,
|
| 22485 |
+
"grad_norm": 1.004279375076294,
|
| 22486 |
+
"learning_rate": 4.8100575698388324e-06,
|
| 22487 |
+
"loss": 1.8457,
|
| 22488 |
+
"step": 288100
|
| 22489 |
+
},
|
| 22490 |
+
{
|
| 22491 |
+
"epoch": 0.0005581634190858399,
|
| 22492 |
+
"grad_norm": 1.001739740371704,
|
| 22493 |
+
"learning_rate": 4.79700849875708e-06,
|
| 22494 |
+
"loss": 1.8321,
|
| 22495 |
+
"step": 288200
|
| 22496 |
+
},
|
| 22497 |
+
{
|
| 22498 |
+
"epoch": 0.0008372451286287599,
|
| 22499 |
+
"grad_norm": 1.0556291341781616,
|
| 22500 |
+
"learning_rate": 4.7839752733146395e-06,
|
| 22501 |
+
"loss": 1.8446,
|
| 22502 |
+
"step": 288300
|
| 22503 |
+
},
|
| 22504 |
+
{
|
| 22505 |
+
"epoch": 0.0011163268381716798,
|
| 22506 |
+
"grad_norm": 1.014046311378479,
|
| 22507 |
+
"learning_rate": 4.7709579037337525e-06,
|
| 22508 |
+
"loss": 1.8194,
|
| 22509 |
+
"step": 288400
|
| 22510 |
+
},
|
| 22511 |
+
{
|
| 22512 |
+
"epoch": 0.0013954085477146,
|
| 22513 |
+
"grad_norm": 1.0474220514297485,
|
| 22514 |
+
"learning_rate": 4.757956400224214e-06,
|
| 22515 |
+
"loss": 1.8424,
|
| 22516 |
+
"step": 288500
|
| 22517 |
+
},
|
| 22518 |
+
{
|
| 22519 |
+
"epoch": 0.0016744902572575198,
|
| 22520 |
+
"grad_norm": 1.0006691217422485,
|
| 22521 |
+
"learning_rate": 4.744970772983387e-06,
|
| 22522 |
+
"loss": 1.83,
|
| 22523 |
+
"step": 288600
|
| 22524 |
+
},
|
| 22525 |
+
{
|
| 22526 |
+
"epoch": 0.00195357196680044,
|
| 22527 |
+
"grad_norm": 1.0182647705078125,
|
| 22528 |
+
"learning_rate": 4.732001032196173e-06,
|
| 22529 |
+
"loss": 1.8357,
|
| 22530 |
+
"step": 288700
|
| 22531 |
+
},
|
| 22532 |
+
{
|
| 22533 |
+
"epoch": 0.0022326536763433596,
|
| 22534 |
+
"grad_norm": 1.0402028560638428,
|
| 22535 |
+
"learning_rate": 4.719047188035028e-06,
|
| 22536 |
+
"loss": 1.8395,
|
| 22537 |
+
"step": 288800
|
| 22538 |
+
},
|
| 22539 |
+
{
|
| 22540 |
+
"epoch": 0.0025117353858862797,
|
| 22541 |
+
"grad_norm": 0.9996068477630615,
|
| 22542 |
+
"learning_rate": 4.706109250659915e-06,
|
| 22543 |
+
"loss": 1.8601,
|
| 22544 |
+
"step": 288900
|
| 22545 |
+
},
|
| 22546 |
+
{
|
| 22547 |
+
"epoch": 0.0027908170954292,
|
| 22548 |
+
"grad_norm": 1.0102015733718872,
|
| 22549 |
+
"learning_rate": 4.693187230218351e-06,
|
| 22550 |
+
"loss": 1.8282,
|
| 22551 |
+
"step": 289000
|
| 22552 |
+
},
|
| 22553 |
+
{
|
| 22554 |
+
"epoch": 0.0027908170954292,
|
| 22555 |
+
"eval_loss": 2.2036867141723633,
|
| 22556 |
+
"eval_runtime": 51.9029,
|
| 22557 |
+
"eval_samples_per_second": 196.405,
|
| 22558 |
+
"eval_steps_per_second": 1.541,
|
| 22559 |
+
"step": 289000
|
| 22560 |
+
},
|
| 22561 |
+
{
|
| 22562 |
+
"epoch": 0.00306989880497212,
|
| 22563 |
+
"grad_norm": 1.0433801412582397,
|
| 22564 |
+
"learning_rate": 4.680281136845338e-06,
|
| 22565 |
+
"loss": 1.843,
|
| 22566 |
+
"step": 289100
|
| 22567 |
+
},
|
| 22568 |
+
{
|
| 22569 |
+
"epoch": 0.0033489805145150396,
|
| 22570 |
+
"grad_norm": 1.04608952999115,
|
| 22571 |
+
"learning_rate": 4.667390980663416e-06,
|
| 22572 |
+
"loss": 1.8499,
|
| 22573 |
+
"step": 289200
|
| 22574 |
+
},
|
| 22575 |
+
{
|
| 22576 |
+
"epoch": 0.0036280622240579597,
|
| 22577 |
+
"grad_norm": 1.0211896896362305,
|
| 22578 |
+
"learning_rate": 4.654516771782597e-06,
|
| 22579 |
+
"loss": 1.8431,
|
| 22580 |
+
"step": 289300
|
| 22581 |
+
},
|
| 22582 |
+
{
|
| 22583 |
+
"epoch": 0.00390714393360088,
|
| 22584 |
+
"grad_norm": 1.0457395315170288,
|
| 22585 |
+
"learning_rate": 4.641658520300407e-06,
|
| 22586 |
+
"loss": 1.8281,
|
| 22587 |
+
"step": 289400
|
| 22588 |
+
},
|
| 22589 |
+
{
|
| 22590 |
+
"epoch": 0.0041862256431437995,
|
| 22591 |
+
"grad_norm": 1.0063202381134033,
|
| 22592 |
+
"learning_rate": 4.6288162363018475e-06,
|
| 22593 |
+
"loss": 1.8336,
|
| 22594 |
+
"step": 289500
|
| 22595 |
+
},
|
| 22596 |
+
{
|
| 22597 |
+
"epoch": 0.004465307352686719,
|
| 22598 |
+
"grad_norm": 1.003279209136963,
|
| 22599 |
+
"learning_rate": 4.615989929859402e-06,
|
| 22600 |
+
"loss": 1.8408,
|
| 22601 |
+
"step": 289600
|
| 22602 |
+
},
|
| 22603 |
+
{
|
| 22604 |
+
"epoch": 0.00474438906222964,
|
| 22605 |
+
"grad_norm": 1.0246212482452393,
|
| 22606 |
+
"learning_rate": 4.603179611033006e-06,
|
| 22607 |
+
"loss": 1.8343,
|
| 22608 |
+
"step": 289700
|
| 22609 |
+
},
|
| 22610 |
+
{
|
| 22611 |
+
"epoch": 0.005023470771772559,
|
| 22612 |
+
"grad_norm": 1.0443739891052246,
|
| 22613 |
+
"learning_rate": 4.590385289870075e-06,
|
| 22614 |
+
"loss": 1.8401,
|
| 22615 |
+
"step": 289800
|
| 22616 |
+
},
|
| 22617 |
+
{
|
| 22618 |
+
"epoch": 0.00530255248131548,
|
| 22619 |
+
"grad_norm": 1.0705519914627075,
|
| 22620 |
+
"learning_rate": 4.577606976405466e-06,
|
| 22621 |
+
"loss": 1.8389,
|
| 22622 |
+
"step": 289900
|
| 22623 |
+
},
|
| 22624 |
+
{
|
| 22625 |
+
"epoch": 0.0055816341908584,
|
| 22626 |
+
"grad_norm": 1.0605077743530273,
|
| 22627 |
+
"learning_rate": 4.564844680661487e-06,
|
| 22628 |
+
"loss": 1.8219,
|
| 22629 |
+
"step": 290000
|
| 22630 |
+
},
|
| 22631 |
+
{
|
| 22632 |
+
"epoch": 0.0055816341908584,
|
| 22633 |
+
"eval_loss": 2.2071588039398193,
|
| 22634 |
+
"eval_runtime": 51.4776,
|
| 22635 |
+
"eval_samples_per_second": 198.028,
|
| 22636 |
+
"eval_steps_per_second": 1.554,
|
| 22637 |
+
"step": 290000
|
| 22638 |
+
},
|
| 22639 |
+
{
|
| 22640 |
+
"epoch": 0.005860715900401319,
|
| 22641 |
+
"grad_norm": 1.0212249755859375,
|
| 22642 |
+
"learning_rate": 4.552098412647887e-06,
|
| 22643 |
+
"loss": 1.8286,
|
| 22644 |
+
"step": 290100
|
| 22645 |
+
},
|
| 22646 |
+
{
|
| 22647 |
+
"epoch": 0.00613979760994424,
|
| 22648 |
+
"grad_norm": 1.0316197872161865,
|
| 22649 |
+
"learning_rate": 4.539368182361822e-06,
|
| 22650 |
+
"loss": 1.831,
|
| 22651 |
+
"step": 290200
|
| 22652 |
+
},
|
| 22653 |
+
{
|
| 22654 |
+
"epoch": 0.0064188793194871595,
|
| 22655 |
+
"grad_norm": 1.059012532234192,
|
| 22656 |
+
"learning_rate": 4.526653999787897e-06,
|
| 22657 |
+
"loss": 1.8454,
|
| 22658 |
+
"step": 290300
|
| 22659 |
+
},
|
| 22660 |
+
{
|
| 22661 |
+
"epoch": 0.006697961029030079,
|
| 22662 |
+
"grad_norm": 1.0601192712783813,
|
| 22663 |
+
"learning_rate": 4.51395587489811e-06,
|
| 22664 |
+
"loss": 1.8344,
|
| 22665 |
+
"step": 290400
|
| 22666 |
+
},
|
| 22667 |
+
{
|
| 22668 |
+
"epoch": 0.006977042738573,
|
| 22669 |
+
"grad_norm": 1.028264045715332,
|
| 22670 |
+
"learning_rate": 4.50127381765188e-06,
|
| 22671 |
+
"loss": 1.8418,
|
| 22672 |
+
"step": 290500
|
| 22673 |
+
},
|
| 22674 |
+
{
|
| 22675 |
+
"epoch": 0.0072561244481159195,
|
| 22676 |
+
"grad_norm": 1.0473825931549072,
|
| 22677 |
+
"learning_rate": 4.488607837996006e-06,
|
| 22678 |
+
"loss": 1.8273,
|
| 22679 |
+
"step": 290600
|
| 22680 |
+
},
|
| 22681 |
+
{
|
| 22682 |
+
"epoch": 0.007535206157658839,
|
| 22683 |
+
"grad_norm": 1.0223557949066162,
|
| 22684 |
+
"learning_rate": 4.475957945864692e-06,
|
| 22685 |
+
"loss": 1.8318,
|
| 22686 |
+
"step": 290700
|
| 22687 |
+
},
|
| 22688 |
+
{
|
| 22689 |
+
"epoch": 0.00781428786720176,
|
| 22690 |
+
"grad_norm": 1.026455283164978,
|
| 22691 |
+
"learning_rate": 4.463324151179521e-06,
|
| 22692 |
+
"loss": 1.8252,
|
| 22693 |
+
"step": 290800
|
| 22694 |
+
},
|
| 22695 |
+
{
|
| 22696 |
+
"epoch": 0.00809336957674468,
|
| 22697 |
+
"grad_norm": 1.0249643325805664,
|
| 22698 |
+
"learning_rate": 4.450706463849458e-06,
|
| 22699 |
+
"loss": 1.8384,
|
| 22700 |
+
"step": 290900
|
| 22701 |
+
},
|
| 22702 |
+
{
|
| 22703 |
+
"epoch": 0.008372451286287599,
|
| 22704 |
+
"grad_norm": 1.0897847414016724,
|
| 22705 |
+
"learning_rate": 4.438104893770806e-06,
|
| 22706 |
+
"loss": 1.8316,
|
| 22707 |
+
"step": 291000
|
| 22708 |
+
},
|
| 22709 |
+
{
|
| 22710 |
+
"epoch": 0.008372451286287599,
|
| 22711 |
+
"eval_loss": 2.2129878997802734,
|
| 22712 |
+
"eval_runtime": 51.4364,
|
| 22713 |
+
"eval_samples_per_second": 198.186,
|
| 22714 |
+
"eval_steps_per_second": 1.555,
|
| 22715 |
+
"step": 291000
|
| 22716 |
+
},
|
| 22717 |
+
{
|
| 22718 |
+
"epoch": 0.008651532995830519,
|
| 22719 |
+
"grad_norm": 1.0365105867385864,
|
| 22720 |
+
"learning_rate": 4.425519450827259e-06,
|
| 22721 |
+
"loss": 1.8085,
|
| 22722 |
+
"step": 291100
|
| 22723 |
+
},
|
| 22724 |
+
{
|
| 22725 |
+
"epoch": 0.008930614705373438,
|
| 22726 |
+
"grad_norm": 1.0275731086730957,
|
| 22727 |
+
"learning_rate": 4.412950144889849e-06,
|
| 22728 |
+
"loss": 1.8278,
|
| 22729 |
+
"step": 291200
|
| 22730 |
+
},
|
| 22731 |
+
{
|
| 22732 |
+
"epoch": 0.00920969641491636,
|
| 22733 |
+
"grad_norm": 1.022194504737854,
|
| 22734 |
+
"learning_rate": 4.400396985816957e-06,
|
| 22735 |
+
"loss": 1.8147,
|
| 22736 |
+
"step": 291300
|
| 22737 |
+
},
|
| 22738 |
+
{
|
| 22739 |
+
"epoch": 0.00948877812445928,
|
| 22740 |
+
"grad_norm": 1.0360770225524902,
|
| 22741 |
+
"learning_rate": 4.387859983454279e-06,
|
| 22742 |
+
"loss": 1.835,
|
| 22743 |
+
"step": 291400
|
| 22744 |
+
},
|
| 22745 |
+
{
|
| 22746 |
+
"epoch": 0.0097678598340022,
|
| 22747 |
+
"grad_norm": 1.0490261316299438,
|
| 22748 |
+
"learning_rate": 4.375339147634866e-06,
|
| 22749 |
+
"loss": 1.8309,
|
| 22750 |
+
"step": 291500
|
| 22751 |
+
},
|
| 22752 |
+
{
|
| 22753 |
+
"epoch": 0.010046941543545119,
|
| 22754 |
+
"grad_norm": 1.0081874132156372,
|
| 22755 |
+
"learning_rate": 4.362834488179085e-06,
|
| 22756 |
+
"loss": 1.8247,
|
| 22757 |
+
"step": 291600
|
| 22758 |
+
},
|
| 22759 |
+
{
|
| 22760 |
+
"epoch": 0.010326023253088039,
|
| 22761 |
+
"grad_norm": 1.0340025424957275,
|
| 22762 |
+
"learning_rate": 4.350346014894596e-06,
|
| 22763 |
+
"loss": 1.8288,
|
| 22764 |
+
"step": 291700
|
| 22765 |
+
},
|
| 22766 |
+
{
|
| 22767 |
+
"epoch": 0.01060510496263096,
|
| 22768 |
+
"grad_norm": 1.080769419670105,
|
| 22769 |
+
"learning_rate": 4.337873737576376e-06,
|
| 22770 |
+
"loss": 1.8186,
|
| 22771 |
+
"step": 291800
|
| 22772 |
+
},
|
| 22773 |
+
{
|
| 22774 |
+
"epoch": 0.01088418667217388,
|
| 22775 |
+
"grad_norm": 1.027443528175354,
|
| 22776 |
+
"learning_rate": 4.3254176660067005e-06,
|
| 22777 |
+
"loss": 1.8374,
|
| 22778 |
+
"step": 291900
|
| 22779 |
+
},
|
| 22780 |
+
{
|
| 22781 |
+
"epoch": 0.0111632683817168,
|
| 22782 |
+
"grad_norm": 0.9847263097763062,
|
| 22783 |
+
"learning_rate": 4.3129778099551376e-06,
|
| 22784 |
+
"loss": 1.8312,
|
| 22785 |
+
"step": 292000
|
| 22786 |
+
},
|
| 22787 |
+
{
|
| 22788 |
+
"epoch": 0.0111632683817168,
|
| 22789 |
+
"eval_loss": 2.2189362049102783,
|
| 22790 |
+
"eval_runtime": 51.4867,
|
| 22791 |
+
"eval_samples_per_second": 197.993,
|
| 22792 |
+
"eval_steps_per_second": 1.554,
|
| 22793 |
+
"step": 292000
|
| 22794 |
+
},
|
| 22795 |
+
{
|
| 22796 |
+
"epoch": 0.011442350091259719,
|
| 22797 |
+
"grad_norm": 1.0336400270462036,
|
| 22798 |
+
"learning_rate": 4.30055417917854e-06,
|
| 22799 |
+
"loss": 1.8155,
|
| 22800 |
+
"step": 292100
|
| 22801 |
+
},
|
| 22802 |
+
{
|
| 22803 |
+
"epoch": 0.011721431800802639,
|
| 22804 |
+
"grad_norm": 1.011435627937317,
|
| 22805 |
+
"learning_rate": 4.288146783421012e-06,
|
| 22806 |
+
"loss": 1.8494,
|
| 22807 |
+
"step": 292200
|
| 22808 |
+
},
|
| 22809 |
+
{
|
| 22810 |
+
"epoch": 0.012000513510345558,
|
| 22811 |
+
"grad_norm": 1.0581125020980835,
|
| 22812 |
+
"learning_rate": 4.275755632413947e-06,
|
| 22813 |
+
"loss": 1.8194,
|
| 22814 |
+
"step": 292300
|
| 22815 |
+
},
|
| 22816 |
+
{
|
| 22817 |
+
"epoch": 0.01227959521988848,
|
| 22818 |
+
"grad_norm": 1.0441781282424927,
|
| 22819 |
+
"learning_rate": 4.263380735875991e-06,
|
| 22820 |
+
"loss": 1.8043,
|
| 22821 |
+
"step": 292400
|
| 22822 |
+
},
|
| 22823 |
+
{
|
| 22824 |
+
"epoch": 0.0125586769294314,
|
| 22825 |
+
"grad_norm": 0.9977090358734131,
|
| 22826 |
+
"learning_rate": 4.251022103513047e-06,
|
| 22827 |
+
"loss": 1.8424,
|
| 22828 |
+
"step": 292500
|
| 22829 |
+
},
|
| 22830 |
+
{
|
| 22831 |
+
"epoch": 0.012837758638974319,
|
| 22832 |
+
"grad_norm": 1.0513739585876465,
|
| 22833 |
+
"learning_rate": 4.238679745018243e-06,
|
| 22834 |
+
"loss": 1.8396,
|
| 22835 |
+
"step": 292600
|
| 22836 |
+
},
|
| 22837 |
+
{
|
| 22838 |
+
"epoch": 0.013116840348517239,
|
| 22839 |
+
"grad_norm": 1.013461947441101,
|
| 22840 |
+
"learning_rate": 4.226353670071961e-06,
|
| 22841 |
+
"loss": 1.8254,
|
| 22842 |
+
"step": 292700
|
| 22843 |
+
},
|
| 22844 |
+
{
|
| 22845 |
+
"epoch": 0.013395922058060158,
|
| 22846 |
+
"grad_norm": 1.0267400741577148,
|
| 22847 |
+
"learning_rate": 4.214043888341812e-06,
|
| 22848 |
+
"loss": 1.8194,
|
| 22849 |
+
"step": 292800
|
| 22850 |
+
},
|
| 22851 |
+
{
|
| 22852 |
+
"epoch": 0.013675003767603078,
|
| 22853 |
+
"grad_norm": 1.0605510473251343,
|
| 22854 |
+
"learning_rate": 4.201750409482607e-06,
|
| 22855 |
+
"loss": 1.83,
|
| 22856 |
+
"step": 292900
|
| 22857 |
+
},
|
| 22858 |
+
{
|
| 22859 |
+
"epoch": 0.013954085477146,
|
| 22860 |
+
"grad_norm": 1.0429390668869019,
|
| 22861 |
+
"learning_rate": 4.189473243136402e-06,
|
| 22862 |
+
"loss": 1.8305,
|
| 22863 |
+
"step": 293000
|
| 22864 |
+
},
|
| 22865 |
+
{
|
| 22866 |
+
"epoch": 0.013954085477146,
|
| 22867 |
+
"eval_loss": 2.212700843811035,
|
| 22868 |
+
"eval_runtime": 51.5608,
|
| 22869 |
+
"eval_samples_per_second": 197.708,
|
| 22870 |
+
"eval_steps_per_second": 1.552,
|
| 22871 |
+
"step": 293000
|
| 22872 |
+
},
|
| 22873 |
+
{
|
| 22874 |
+
"epoch": 0.01423316718668892,
|
| 22875 |
+
"grad_norm": 0.9996118545532227,
|
| 22876 |
+
"learning_rate": 4.177212398932428e-06,
|
| 22877 |
+
"loss": 1.8341,
|
| 22878 |
+
"step": 293100
|
| 22879 |
+
},
|
| 22880 |
+
{
|
| 22881 |
+
"epoch": 0.014512248896231839,
|
| 22882 |
+
"grad_norm": 1.0341185331344604,
|
| 22883 |
+
"learning_rate": 4.164967886487131e-06,
|
| 22884 |
+
"loss": 1.8232,
|
| 22885 |
+
"step": 293200
|
| 22886 |
+
},
|
| 22887 |
+
{
|
| 22888 |
+
"epoch": 0.014791330605774759,
|
| 22889 |
+
"grad_norm": 1.0189030170440674,
|
| 22890 |
+
"learning_rate": 4.15273971540415e-06,
|
| 22891 |
+
"loss": 1.8226,
|
| 22892 |
+
"step": 293300
|
| 22893 |
+
},
|
| 22894 |
+
{
|
| 22895 |
+
"epoch": 0.015070412315317678,
|
| 22896 |
+
"grad_norm": 1.0681477785110474,
|
| 22897 |
+
"learning_rate": 4.140527895274301e-06,
|
| 22898 |
+
"loss": 1.8146,
|
| 22899 |
+
"step": 293400
|
| 22900 |
+
},
|
| 22901 |
+
{
|
| 22902 |
+
"epoch": 0.015349494024860598,
|
| 22903 |
+
"grad_norm": 1.066925048828125,
|
| 22904 |
+
"learning_rate": 4.128332435675569e-06,
|
| 22905 |
+
"loss": 1.8229,
|
| 22906 |
+
"step": 293500
|
| 22907 |
+
},
|
| 22908 |
+
{
|
| 22909 |
+
"epoch": 0.01562857573440352,
|
| 22910 |
+
"grad_norm": 1.0204412937164307,
|
| 22911 |
+
"learning_rate": 4.116153346173121e-06,
|
| 22912 |
+
"loss": 1.8244,
|
| 22913 |
+
"step": 293600
|
| 22914 |
+
},
|
| 22915 |
+
{
|
| 22916 |
+
"epoch": 0.015907657443946437,
|
| 22917 |
+
"grad_norm": 1.0246905088424683,
|
| 22918 |
+
"learning_rate": 4.103990636319274e-06,
|
| 22919 |
+
"loss": 1.8073,
|
| 22920 |
+
"step": 293700
|
| 22921 |
+
},
|
| 22922 |
+
{
|
| 22923 |
+
"epoch": 0.01618673915348936,
|
| 22924 |
+
"grad_norm": 1.0570878982543945,
|
| 22925 |
+
"learning_rate": 4.091844315653512e-06,
|
| 22926 |
+
"loss": 1.8125,
|
| 22927 |
+
"step": 293800
|
| 22928 |
+
},
|
| 22929 |
+
{
|
| 22930 |
+
"epoch": 0.01646582086303228,
|
| 22931 |
+
"grad_norm": 1.0208039283752441,
|
| 22932 |
+
"learning_rate": 4.079714393702441e-06,
|
| 22933 |
+
"loss": 1.8197,
|
| 22934 |
+
"step": 293900
|
| 22935 |
+
},
|
| 22936 |
+
{
|
| 22937 |
+
"epoch": 0.016744902572575198,
|
| 22938 |
+
"grad_norm": 1.0461581945419312,
|
| 22939 |
+
"learning_rate": 4.067600879979824e-06,
|
| 22940 |
+
"loss": 1.8177,
|
| 22941 |
+
"step": 294000
|
| 22942 |
+
},
|
| 22943 |
+
{
|
| 22944 |
+
"epoch": 0.016744902572575198,
|
| 22945 |
+
"eval_loss": 2.225311756134033,
|
| 22946 |
+
"eval_runtime": 51.5825,
|
| 22947 |
+
"eval_samples_per_second": 197.625,
|
| 22948 |
+
"eval_steps_per_second": 1.551,
|
| 22949 |
+
"step": 294000
|
| 22950 |
+
},
|
| 22951 |
+
{
|
| 22952 |
+
"epoch": 0.01702398428211812,
|
| 22953 |
+
"grad_norm": 1.0400618314743042,
|
| 22954 |
+
"learning_rate": 4.055503783986556e-06,
|
| 22955 |
+
"loss": 1.8126,
|
| 22956 |
+
"step": 294100
|
| 22957 |
+
},
|
| 22958 |
+
{
|
| 22959 |
+
"epoch": 0.017303065991661037,
|
| 22960 |
+
"grad_norm": 1.0598971843719482,
|
| 22961 |
+
"learning_rate": 4.043423115210637e-06,
|
| 22962 |
+
"loss": 1.826,
|
| 22963 |
+
"step": 294200
|
| 22964 |
+
},
|
| 22965 |
+
{
|
| 22966 |
+
"epoch": 0.01758214770120396,
|
| 22967 |
+
"grad_norm": 0.9947335124015808,
|
| 22968 |
+
"learning_rate": 4.031358883127207e-06,
|
| 22969 |
+
"loss": 1.8312,
|
| 22970 |
+
"step": 294300
|
| 22971 |
+
},
|
| 22972 |
+
{
|
| 22973 |
+
"epoch": 0.017861229410746877,
|
| 22974 |
+
"grad_norm": 1.0881414413452148,
|
| 22975 |
+
"learning_rate": 4.019311097198489e-06,
|
| 22976 |
+
"loss": 1.8321,
|
| 22977 |
+
"step": 294400
|
| 22978 |
+
},
|
| 22979 |
+
{
|
| 22980 |
+
"epoch": 0.018140311120289798,
|
| 22981 |
+
"grad_norm": 1.0416432619094849,
|
| 22982 |
+
"learning_rate": 4.007279766873828e-06,
|
| 22983 |
+
"loss": 1.8171,
|
| 22984 |
+
"step": 294500
|
| 22985 |
+
},
|
| 22986 |
+
{
|
| 22987 |
+
"epoch": 0.01841939282983272,
|
| 22988 |
+
"grad_norm": 1.0456783771514893,
|
| 22989 |
+
"learning_rate": 3.9952649015896545e-06,
|
| 22990 |
+
"loss": 1.8077,
|
| 22991 |
+
"step": 294600
|
| 22992 |
+
},
|
| 22993 |
+
{
|
| 22994 |
+
"epoch": 0.018698474539375638,
|
| 22995 |
+
"grad_norm": 1.0717263221740723,
|
| 22996 |
+
"learning_rate": 3.983266510769479e-06,
|
| 22997 |
+
"loss": 1.8269,
|
| 22998 |
+
"step": 294700
|
| 22999 |
+
},
|
| 23000 |
+
{
|
| 23001 |
+
"epoch": 0.01897755624891856,
|
| 23002 |
+
"grad_norm": 1.0348212718963623,
|
| 23003 |
+
"learning_rate": 3.971284603823899e-06,
|
| 23004 |
+
"loss": 1.839,
|
| 23005 |
+
"step": 294800
|
| 23006 |
+
},
|
| 23007 |
+
{
|
| 23008 |
+
"epoch": 0.019256637958461477,
|
| 23009 |
+
"grad_norm": 1.017639398574829,
|
| 23010 |
+
"learning_rate": 3.9593191901505846e-06,
|
| 23011 |
+
"loss": 1.8076,
|
| 23012 |
+
"step": 294900
|
| 23013 |
+
},
|
| 23014 |
+
{
|
| 23015 |
+
"epoch": 0.0195357196680044,
|
| 23016 |
+
"grad_norm": 1.0568976402282715,
|
| 23017 |
+
"learning_rate": 3.947370279134269e-06,
|
| 23018 |
+
"loss": 1.8317,
|
| 23019 |
+
"step": 295000
|
| 23020 |
+
},
|
| 23021 |
+
{
|
| 23022 |
+
"epoch": 0.0195357196680044,
|
| 23023 |
+
"eval_loss": 2.2106025218963623,
|
| 23024 |
+
"eval_runtime": 51.5707,
|
| 23025 |
+
"eval_samples_per_second": 197.67,
|
| 23026 |
+
"eval_steps_per_second": 1.551,
|
| 23027 |
+
"step": 295000
|
| 23028 |
+
},
|
| 23029 |
+
{
|
| 23030 |
+
"epoch": 0.01981480137754732,
|
| 23031 |
+
"grad_norm": 0.9975104928016663,
|
| 23032 |
+
"learning_rate": 3.935437880146728e-06,
|
| 23033 |
+
"loss": 1.8075,
|
| 23034 |
+
"step": 295100
|
| 23035 |
+
},
|
| 23036 |
+
{
|
| 23037 |
+
"epoch": 0.020093883087090238,
|
| 23038 |
+
"grad_norm": 1.0724748373031616,
|
| 23039 |
+
"learning_rate": 3.923522002546804e-06,
|
| 23040 |
+
"loss": 1.8101,
|
| 23041 |
+
"step": 295200
|
| 23042 |
+
},
|
| 23043 |
+
{
|
| 23044 |
+
"epoch": 0.02037296479663316,
|
| 23045 |
+
"grad_norm": 1.0251374244689941,
|
| 23046 |
+
"learning_rate": 3.911622655680375e-06,
|
| 23047 |
+
"loss": 1.8165,
|
| 23048 |
+
"step": 295300
|
| 23049 |
+
},
|
| 23050 |
+
{
|
| 23051 |
+
"epoch": 0.020652046506176077,
|
| 23052 |
+
"grad_norm": 0.9875963926315308,
|
| 23053 |
+
"learning_rate": 3.89973984888036e-06,
|
| 23054 |
+
"loss": 1.832,
|
| 23055 |
+
"step": 295400
|
| 23056 |
+
},
|
| 23057 |
+
{
|
| 23058 |
+
"epoch": 0.020931128215719,
|
| 23059 |
+
"grad_norm": 1.022261619567871,
|
| 23060 |
+
"learning_rate": 3.887873591466687e-06,
|
| 23061 |
+
"loss": 1.822,
|
| 23062 |
+
"step": 295500
|
| 23063 |
+
},
|
| 23064 |
+
{
|
| 23065 |
+
"epoch": 0.02121020992526192,
|
| 23066 |
+
"grad_norm": 1.035934329032898,
|
| 23067 |
+
"learning_rate": 3.8760238927463306e-06,
|
| 23068 |
+
"loss": 1.8143,
|
| 23069 |
+
"step": 295600
|
| 23070 |
+
},
|
| 23071 |
+
{
|
| 23072 |
+
"epoch": 0.021489291634804838,
|
| 23073 |
+
"grad_norm": 1.0614137649536133,
|
| 23074 |
+
"learning_rate": 3.864190762013248e-06,
|
| 23075 |
+
"loss": 1.8123,
|
| 23076 |
+
"step": 295700
|
| 23077 |
+
},
|
| 23078 |
+
{
|
| 23079 |
+
"epoch": 0.02176837334434776,
|
| 23080 |
+
"grad_norm": 1.0247828960418701,
|
| 23081 |
+
"learning_rate": 3.8523742085484235e-06,
|
| 23082 |
+
"loss": 1.8284,
|
| 23083 |
+
"step": 295800
|
| 23084 |
+
},
|
| 23085 |
+
{
|
| 23086 |
+
"epoch": 0.022047455053890677,
|
| 23087 |
+
"grad_norm": 1.0341575145721436,
|
| 23088 |
+
"learning_rate": 3.84057424161984e-06,
|
| 23089 |
+
"loss": 1.8288,
|
| 23090 |
+
"step": 295900
|
| 23091 |
+
},
|
| 23092 |
+
{
|
| 23093 |
+
"epoch": 0.0223265367634336,
|
| 23094 |
+
"grad_norm": 1.0558165311813354,
|
| 23095 |
+
"learning_rate": 3.8287908704824545e-06,
|
| 23096 |
+
"loss": 1.8145,
|
| 23097 |
+
"step": 296000
|
| 23098 |
+
},
|
| 23099 |
+
{
|
| 23100 |
+
"epoch": 0.0223265367634336,
|
| 23101 |
+
"eval_loss": 2.2238752841949463,
|
| 23102 |
+
"eval_runtime": 52.008,
|
| 23103 |
+
"eval_samples_per_second": 196.008,
|
| 23104 |
+
"eval_steps_per_second": 1.538,
|
| 23105 |
+
"step": 296000
|
| 23106 |
+
},
|
| 23107 |
+
{
|
| 23108 |
+
"epoch": 0.022605618472976517,
|
| 23109 |
+
"grad_norm": 1.0519689321517944,
|
| 23110 |
+
"learning_rate": 3.8170241043782225e-06,
|
| 23111 |
+
"loss": 1.8309,
|
| 23112 |
+
"step": 296100
|
| 23113 |
+
},
|
| 23114 |
+
{
|
| 23115 |
+
"epoch": 0.022884700182519438,
|
| 23116 |
+
"grad_norm": 1.0083707571029663,
|
| 23117 |
+
"learning_rate": 3.8052739525360674e-06,
|
| 23118 |
+
"loss": 1.8125,
|
| 23119 |
+
"step": 296200
|
| 23120 |
+
},
|
| 23121 |
+
{
|
| 23122 |
+
"epoch": 0.02316378189206236,
|
| 23123 |
+
"grad_norm": 1.028019905090332,
|
| 23124 |
+
"learning_rate": 3.793540424171896e-06,
|
| 23125 |
+
"loss": 1.819,
|
| 23126 |
+
"step": 296300
|
| 23127 |
+
},
|
| 23128 |
+
{
|
| 23129 |
+
"epoch": 0.023442863601605277,
|
| 23130 |
+
"grad_norm": 1.0253424644470215,
|
| 23131 |
+
"learning_rate": 3.781823528488554e-06,
|
| 23132 |
+
"loss": 1.7998,
|
| 23133 |
+
"step": 296400
|
| 23134 |
+
},
|
| 23135 |
+
{
|
| 23136 |
+
"epoch": 0.0237219453111482,
|
| 23137 |
+
"grad_norm": 1.0056533813476562,
|
| 23138 |
+
"learning_rate": 3.770123274675855e-06,
|
| 23139 |
+
"loss": 1.8169,
|
| 23140 |
+
"step": 296500
|
| 23141 |
+
},
|
| 23142 |
+
{
|
| 23143 |
+
"epoch": 0.024001027020691117,
|
| 23144 |
+
"grad_norm": 0.9970018863677979,
|
| 23145 |
+
"learning_rate": 3.758439671910563e-06,
|
| 23146 |
+
"loss": 1.8182,
|
| 23147 |
+
"step": 296600
|
| 23148 |
+
},
|
| 23149 |
+
{
|
| 23150 |
+
"epoch": 0.024280108730234038,
|
| 23151 |
+
"grad_norm": 1.1070934534072876,
|
| 23152 |
+
"learning_rate": 3.746772729356382e-06,
|
| 23153 |
+
"loss": 1.8255,
|
| 23154 |
+
"step": 296700
|
| 23155 |
+
},
|
| 23156 |
+
{
|
| 23157 |
+
"epoch": 0.02455919043977696,
|
| 23158 |
+
"grad_norm": 1.0187138319015503,
|
| 23159 |
+
"learning_rate": 3.735122456163936e-06,
|
| 23160 |
+
"loss": 1.8185,
|
| 23161 |
+
"step": 296800
|
| 23162 |
+
},
|
| 23163 |
+
{
|
| 23164 |
+
"epoch": 0.024838272149319877,
|
| 23165 |
+
"grad_norm": 1.029852271080017,
|
| 23166 |
+
"learning_rate": 3.723488861470792e-06,
|
| 23167 |
+
"loss": 1.8215,
|
| 23168 |
+
"step": 296900
|
| 23169 |
+
},
|
| 23170 |
+
{
|
| 23171 |
+
"epoch": 0.0251173538588628,
|
| 23172 |
+
"grad_norm": 1.0434601306915283,
|
| 23173 |
+
"learning_rate": 3.711871954401419e-06,
|
| 23174 |
+
"loss": 1.8068,
|
| 23175 |
+
"step": 297000
|
| 23176 |
+
},
|
| 23177 |
+
{
|
| 23178 |
+
"epoch": 0.0251173538588628,
|
| 23179 |
+
"eval_loss": 2.2075464725494385,
|
| 23180 |
+
"eval_runtime": 51.6992,
|
| 23181 |
+
"eval_samples_per_second": 197.179,
|
| 23182 |
+
"eval_steps_per_second": 1.547,
|
| 23183 |
+
"step": 297000
|
| 23184 |
+
},
|
| 23185 |
+
{
|
| 23186 |
+
"epoch": 0.025396435568405717,
|
| 23187 |
+
"grad_norm": 1.087276816368103,
|
| 23188 |
+
"learning_rate": 3.7002717440672184e-06,
|
| 23189 |
+
"loss": 1.8137,
|
| 23190 |
+
"step": 297100
|
| 23191 |
+
},
|
| 23192 |
+
{
|
| 23193 |
+
"epoch": 0.025675517277948638,
|
| 23194 |
+
"grad_norm": 1.039167046546936,
|
| 23195 |
+
"learning_rate": 3.688688239566471e-06,
|
| 23196 |
+
"loss": 1.802,
|
| 23197 |
+
"step": 297200
|
| 23198 |
+
},
|
| 23199 |
+
{
|
| 23200 |
+
"epoch": 0.025954598987491556,
|
| 23201 |
+
"grad_norm": 1.0578351020812988,
|
| 23202 |
+
"learning_rate": 3.6771214499843693e-06,
|
| 23203 |
+
"loss": 1.8276,
|
| 23204 |
+
"step": 297300
|
| 23205 |
+
},
|
| 23206 |
+
{
|
| 23207 |
+
"epoch": 0.026233680697034478,
|
| 23208 |
+
"grad_norm": 1.0496500730514526,
|
| 23209 |
+
"learning_rate": 3.6655713843930018e-06,
|
| 23210 |
+
"loss": 1.806,
|
| 23211 |
+
"step": 297400
|
| 23212 |
+
},
|
| 23213 |
+
{
|
| 23214 |
+
"epoch": 0.0265127624065774,
|
| 23215 |
+
"grad_norm": 1.0503089427947998,
|
| 23216 |
+
"learning_rate": 3.654038051851333e-06,
|
| 23217 |
+
"loss": 1.8041,
|
| 23218 |
+
"step": 297500
|
| 23219 |
+
},
|
| 23220 |
+
{
|
| 23221 |
+
"epoch": 0.026791844116120317,
|
| 23222 |
+
"grad_norm": 1.0246284008026123,
|
| 23223 |
+
"learning_rate": 3.6425214614051936e-06,
|
| 23224 |
+
"loss": 1.7952,
|
| 23225 |
+
"step": 297600
|
| 23226 |
+
},
|
| 23227 |
+
{
|
| 23228 |
+
"epoch": 0.02707092582566324,
|
| 23229 |
+
"grad_norm": 1.0332282781600952,
|
| 23230 |
+
"learning_rate": 3.631021622087297e-06,
|
| 23231 |
+
"loss": 1.8265,
|
| 23232 |
+
"step": 297700
|
| 23233 |
+
},
|
| 23234 |
+
{
|
| 23235 |
+
"epoch": 0.027350007535206156,
|
| 23236 |
+
"grad_norm": 1.0222516059875488,
|
| 23237 |
+
"learning_rate": 3.619538542917217e-06,
|
| 23238 |
+
"loss": 1.8215,
|
| 23239 |
+
"step": 297800
|
| 23240 |
+
},
|
| 23241 |
+
{
|
| 23242 |
+
"epoch": 0.027629089244749078,
|
| 23243 |
+
"grad_norm": 1.0545893907546997,
|
| 23244 |
+
"learning_rate": 3.608072232901377e-06,
|
| 23245 |
+
"loss": 1.8263,
|
| 23246 |
+
"step": 297900
|
| 23247 |
+
},
|
| 23248 |
+
{
|
| 23249 |
+
"epoch": 0.027908170954292,
|
| 23250 |
+
"grad_norm": 1.091201901435852,
|
| 23251 |
+
"learning_rate": 3.596622701033048e-06,
|
| 23252 |
+
"loss": 1.8228,
|
| 23253 |
+
"step": 298000
|
| 23254 |
+
},
|
| 23255 |
+
{
|
| 23256 |
+
"epoch": 0.027908170954292,
|
| 23257 |
+
"eval_loss": 2.2129366397857666,
|
| 23258 |
+
"eval_runtime": 51.7579,
|
| 23259 |
+
"eval_samples_per_second": 196.955,
|
| 23260 |
+
"eval_steps_per_second": 1.546,
|
| 23261 |
+
"step": 298000
|
| 23262 |
+
},
|
| 23263 |
+
{
|
| 23264 |
+
"epoch": 0.028187252663834917,
|
| 23265 |
+
"grad_norm": 1.0261002779006958,
|
| 23266 |
+
"learning_rate": 3.58518995629234e-06,
|
| 23267 |
+
"loss": 1.8203,
|
| 23268 |
+
"step": 298100
|
| 23269 |
+
},
|
| 23270 |
+
{
|
| 23271 |
+
"epoch": 0.02846633437337784,
|
| 23272 |
+
"grad_norm": 1.0479872226715088,
|
| 23273 |
+
"learning_rate": 3.5737740076462106e-06,
|
| 23274 |
+
"loss": 1.7966,
|
| 23275 |
+
"step": 298200
|
| 23276 |
+
},
|
| 23277 |
+
{
|
| 23278 |
+
"epoch": 0.028745416082920756,
|
| 23279 |
+
"grad_norm": 1.036954641342163,
|
| 23280 |
+
"learning_rate": 3.562374864048429e-06,
|
| 23281 |
+
"loss": 1.8111,
|
| 23282 |
+
"step": 298300
|
| 23283 |
+
},
|
| 23284 |
+
{
|
| 23285 |
+
"epoch": 0.029024497792463678,
|
| 23286 |
+
"grad_norm": 1.0831959247589111,
|
| 23287 |
+
"learning_rate": 3.550992534439576e-06,
|
| 23288 |
+
"loss": 1.7991,
|
| 23289 |
+
"step": 298400
|
| 23290 |
+
},
|
| 23291 |
+
{
|
| 23292 |
+
"epoch": 0.0293035795020066,
|
| 23293 |
+
"grad_norm": 1.0515846014022827,
|
| 23294 |
+
"learning_rate": 3.539627027747067e-06,
|
| 23295 |
+
"loss": 1.815,
|
| 23296 |
+
"step": 298500
|
| 23297 |
+
},
|
| 23298 |
+
{
|
| 23299 |
+
"epoch": 0.029582661211549517,
|
| 23300 |
+
"grad_norm": 1.0659505128860474,
|
| 23301 |
+
"learning_rate": 3.5282783528851117e-06,
|
| 23302 |
+
"loss": 1.8105,
|
| 23303 |
+
"step": 298600
|
| 23304 |
+
},
|
| 23305 |
+
{
|
| 23306 |
+
"epoch": 0.02986174292109244,
|
| 23307 |
+
"grad_norm": 1.0669214725494385,
|
| 23308 |
+
"learning_rate": 3.516946518754724e-06,
|
| 23309 |
+
"loss": 1.7961,
|
| 23310 |
+
"step": 298700
|
| 23311 |
+
},
|
| 23312 |
+
{
|
| 23313 |
+
"epoch": 0.030140824630635357,
|
| 23314 |
+
"grad_norm": 1.0422730445861816,
|
| 23315 |
+
"learning_rate": 3.5056315342436945e-06,
|
| 23316 |
+
"loss": 1.8219,
|
| 23317 |
+
"step": 298800
|
| 23318 |
+
},
|
| 23319 |
+
{
|
| 23320 |
+
"epoch": 0.030419906340178278,
|
| 23321 |
+
"grad_norm": 1.0351274013519287,
|
| 23322 |
+
"learning_rate": 3.4943334082266103e-06,
|
| 23323 |
+
"loss": 1.8183,
|
| 23324 |
+
"step": 298900
|
| 23325 |
+
},
|
| 23326 |
+
{
|
| 23327 |
+
"epoch": 0.030698988049721196,
|
| 23328 |
+
"grad_norm": 1.0675437450408936,
|
| 23329 |
+
"learning_rate": 3.483052149564839e-06,
|
| 23330 |
+
"loss": 1.8024,
|
| 23331 |
+
"step": 299000
|
| 23332 |
+
},
|
| 23333 |
+
{
|
| 23334 |
+
"epoch": 0.030698988049721196,
|
| 23335 |
+
"eval_loss": 2.2119719982147217,
|
| 23336 |
+
"eval_runtime": 51.8915,
|
| 23337 |
+
"eval_samples_per_second": 196.448,
|
| 23338 |
+
"eval_steps_per_second": 1.542,
|
| 23339 |
+
"step": 299000
|
| 23340 |
+
},
|
| 23341 |
+
{
|
| 23342 |
+
"epoch": 0.030978069759264117,
|
| 23343 |
+
"grad_norm": 1.033592939376831,
|
| 23344 |
+
"learning_rate": 3.4717877671065103e-06,
|
| 23345 |
+
"loss": 1.8264,
|
| 23346 |
+
"step": 299100
|
| 23347 |
+
},
|
| 23348 |
+
{
|
| 23349 |
+
"epoch": 0.03125715146880704,
|
| 23350 |
+
"grad_norm": 1.0776604413986206,
|
| 23351 |
+
"learning_rate": 3.460540269686524e-06,
|
| 23352 |
+
"loss": 1.7936,
|
| 23353 |
+
"step": 299200
|
| 23354 |
+
},
|
| 23355 |
+
{
|
| 23356 |
+
"epoch": 0.03153623317834996,
|
| 23357 |
+
"grad_norm": 1.0117859840393066,
|
| 23358 |
+
"learning_rate": 3.4493096661265267e-06,
|
| 23359 |
+
"loss": 1.7972,
|
| 23360 |
+
"step": 299300
|
| 23361 |
+
},
|
| 23362 |
+
{
|
| 23363 |
+
"epoch": 0.031815314887892875,
|
| 23364 |
+
"grad_norm": 1.0392084121704102,
|
| 23365 |
+
"learning_rate": 3.438095965234928e-06,
|
| 23366 |
+
"loss": 1.8174,
|
| 23367 |
+
"step": 299400
|
| 23368 |
+
},
|
| 23369 |
+
{
|
| 23370 |
+
"epoch": 0.0320943965974358,
|
| 23371 |
+
"grad_norm": 1.015053153038025,
|
| 23372 |
+
"learning_rate": 3.4268991758068745e-06,
|
| 23373 |
+
"loss": 1.8134,
|
| 23374 |
+
"step": 299500
|
| 23375 |
+
},
|
| 23376 |
+
{
|
| 23377 |
+
"epoch": 0.03237347830697872,
|
| 23378 |
+
"grad_norm": 1.012290596961975,
|
| 23379 |
+
"learning_rate": 3.415719306624246e-06,
|
| 23380 |
+
"loss": 1.8254,
|
| 23381 |
+
"step": 299600
|
| 23382 |
+
},
|
| 23383 |
+
{
|
| 23384 |
+
"epoch": 0.032652560016521635,
|
| 23385 |
+
"grad_norm": 1.0493707656860352,
|
| 23386 |
+
"learning_rate": 3.404556366455647e-06,
|
| 23387 |
+
"loss": 1.8037,
|
| 23388 |
+
"step": 299700
|
| 23389 |
+
},
|
| 23390 |
+
{
|
| 23391 |
+
"epoch": 0.03293164172606456,
|
| 23392 |
+
"grad_norm": 1.0958573818206787,
|
| 23393 |
+
"learning_rate": 3.3934103640564152e-06,
|
| 23394 |
+
"loss": 1.8072,
|
| 23395 |
+
"step": 299800
|
| 23396 |
+
},
|
| 23397 |
+
{
|
| 23398 |
+
"epoch": 0.03321072343560748,
|
| 23399 |
+
"grad_norm": 1.0864017009735107,
|
| 23400 |
+
"learning_rate": 3.382281308168603e-06,
|
| 23401 |
+
"loss": 1.8337,
|
| 23402 |
+
"step": 299900
|
| 23403 |
+
},
|
| 23404 |
+
{
|
| 23405 |
+
"epoch": 0.033489805145150396,
|
| 23406 |
+
"grad_norm": 1.0446292161941528,
|
| 23407 |
+
"learning_rate": 3.3711692075209687e-06,
|
| 23408 |
+
"loss": 1.8123,
|
| 23409 |
+
"step": 300000
|
| 23410 |
+
},
|
| 23411 |
+
{
|
| 23412 |
+
"epoch": 0.033489805145150396,
|
| 23413 |
+
"eval_loss": 2.2072536945343018,
|
| 23414 |
+
"eval_runtime": 51.8799,
|
| 23415 |
+
"eval_samples_per_second": 196.492,
|
| 23416 |
+
"eval_steps_per_second": 1.542,
|
| 23417 |
+
"step": 300000
|
| 23418 |
+
},
|
| 23419 |
+
{
|
| 23420 |
+
"epoch": 0.033768886854693314,
|
| 23421 |
+
"grad_norm": 1.0435408353805542,
|
| 23422 |
+
"learning_rate": 3.3600740708289615e-06,
|
| 23423 |
+
"loss": 1.7973,
|
| 23424 |
+
"step": 300100
|
| 23425 |
+
},
|
| 23426 |
+
{
|
| 23427 |
+
"epoch": 0.03404796856423624,
|
| 23428 |
+
"grad_norm": 1.0345299243927002,
|
| 23429 |
+
"learning_rate": 3.348995906794741e-06,
|
| 23430 |
+
"loss": 1.8213,
|
| 23431 |
+
"step": 300200
|
| 23432 |
+
},
|
| 23433 |
+
{
|
| 23434 |
+
"epoch": 0.03432705027377916,
|
| 23435 |
+
"grad_norm": 1.037927269935608,
|
| 23436 |
+
"learning_rate": 3.33793472410715e-06,
|
| 23437 |
+
"loss": 1.8048,
|
| 23438 |
+
"step": 300300
|
| 23439 |
+
},
|
| 23440 |
+
{
|
| 23441 |
+
"epoch": 0.034606131983322075,
|
| 23442 |
+
"grad_norm": 1.0403209924697876,
|
| 23443 |
+
"learning_rate": 3.326890531441712e-06,
|
| 23444 |
+
"loss": 1.8136,
|
| 23445 |
+
"step": 300400
|
| 23446 |
+
},
|
| 23447 |
+
{
|
| 23448 |
+
"epoch": 0.034885213692865,
|
| 23449 |
+
"grad_norm": 1.0413801670074463,
|
| 23450 |
+
"learning_rate": 3.31586333746062e-06,
|
| 23451 |
+
"loss": 1.7982,
|
| 23452 |
+
"step": 300500
|
| 23453 |
+
},
|
| 23454 |
+
{
|
| 23455 |
+
"epoch": 0.03516429540240792,
|
| 23456 |
+
"grad_norm": 1.0370949506759644,
|
| 23457 |
+
"learning_rate": 3.3048531508127366e-06,
|
| 23458 |
+
"loss": 1.7944,
|
| 23459 |
+
"step": 300600
|
| 23460 |
+
},
|
| 23461 |
+
{
|
| 23462 |
+
"epoch": 0.035443377111950836,
|
| 23463 |
+
"grad_norm": 1.0159741640090942,
|
| 23464 |
+
"learning_rate": 3.2938599801335928e-06,
|
| 23465 |
+
"loss": 1.8001,
|
| 23466 |
+
"step": 300700
|
| 23467 |
+
},
|
| 23468 |
+
{
|
| 23469 |
+
"epoch": 0.035722458821493754,
|
| 23470 |
+
"grad_norm": 1.0671415328979492,
|
| 23471 |
+
"learning_rate": 3.282883834045372e-06,
|
| 23472 |
+
"loss": 1.7925,
|
| 23473 |
+
"step": 300800
|
| 23474 |
+
},
|
| 23475 |
+
{
|
| 23476 |
+
"epoch": 0.03600154053103668,
|
| 23477 |
+
"grad_norm": 1.0309702157974243,
|
| 23478 |
+
"learning_rate": 3.2719247211568965e-06,
|
| 23479 |
+
"loss": 1.8119,
|
| 23480 |
+
"step": 300900
|
| 23481 |
+
},
|
| 23482 |
+
{
|
| 23483 |
+
"epoch": 0.036280622240579596,
|
| 23484 |
+
"grad_norm": 1.02182137966156,
|
| 23485 |
+
"learning_rate": 3.2609826500636238e-06,
|
| 23486 |
+
"loss": 1.8186,
|
| 23487 |
+
"step": 301000
|
| 23488 |
+
},
|
| 23489 |
+
{
|
| 23490 |
+
"epoch": 0.036280622240579596,
|
| 23491 |
+
"eval_loss": 2.212273597717285,
|
| 23492 |
+
"eval_runtime": 51.7058,
|
| 23493 |
+
"eval_samples_per_second": 197.154,
|
| 23494 |
+
"eval_steps_per_second": 1.547,
|
| 23495 |
+
"step": 301000
|
| 23496 |
+
},
|
| 23497 |
+
{
|
| 23498 |
+
"epoch": 0.036559703950122514,
|
| 23499 |
+
"grad_norm": 1.0467265844345093,
|
| 23500 |
+
"learning_rate": 3.2500576293476638e-06,
|
| 23501 |
+
"loss": 1.8002,
|
| 23502 |
+
"step": 301100
|
| 23503 |
+
},
|
| 23504 |
+
{
|
| 23505 |
+
"epoch": 0.03683878565966544,
|
| 23506 |
+
"grad_norm": 1.0487096309661865,
|
| 23507 |
+
"learning_rate": 3.2391496675777484e-06,
|
| 23508 |
+
"loss": 1.7995,
|
| 23509 |
+
"step": 301200
|
| 23510 |
+
},
|
| 23511 |
+
{
|
| 23512 |
+
"epoch": 0.03711786736920836,
|
| 23513 |
+
"grad_norm": 1.0497572422027588,
|
| 23514 |
+
"learning_rate": 3.2282587733092173e-06,
|
| 23515 |
+
"loss": 1.8021,
|
| 23516 |
+
"step": 301300
|
| 23517 |
+
},
|
| 23518 |
+
{
|
| 23519 |
+
"epoch": 0.037396949078751275,
|
| 23520 |
+
"grad_norm": 1.0201036930084229,
|
| 23521 |
+
"learning_rate": 3.217384955084035e-06,
|
| 23522 |
+
"loss": 1.809,
|
| 23523 |
+
"step": 301400
|
| 23524 |
+
},
|
| 23525 |
+
{
|
| 23526 |
+
"epoch": 0.0376760307882942,
|
| 23527 |
+
"grad_norm": 1.0497982501983643,
|
| 23528 |
+
"learning_rate": 3.2065282214307712e-06,
|
| 23529 |
+
"loss": 1.8115,
|
| 23530 |
+
"step": 301500
|
| 23531 |
+
},
|
| 23532 |
+
{
|
| 23533 |
+
"epoch": 0.03795511249783712,
|
| 23534 |
+
"grad_norm": 1.0479981899261475,
|
| 23535 |
+
"learning_rate": 3.1956885808646002e-06,
|
| 23536 |
+
"loss": 1.805,
|
| 23537 |
+
"step": 301600
|
| 23538 |
+
},
|
| 23539 |
+
{
|
| 23540 |
+
"epoch": 0.038234194207380036,
|
| 23541 |
+
"grad_norm": 1.0764997005462646,
|
| 23542 |
+
"learning_rate": 3.1848660418872744e-06,
|
| 23543 |
+
"loss": 1.8092,
|
| 23544 |
+
"step": 301700
|
| 23545 |
+
},
|
| 23546 |
+
{
|
| 23547 |
+
"epoch": 0.038513275916922954,
|
| 23548 |
+
"grad_norm": 1.046151876449585,
|
| 23549 |
+
"learning_rate": 3.174060612987148e-06,
|
| 23550 |
+
"loss": 1.8185,
|
| 23551 |
+
"step": 301800
|
| 23552 |
+
},
|
| 23553 |
+
{
|
| 23554 |
+
"epoch": 0.03879235762646588,
|
| 23555 |
+
"grad_norm": 1.0357836484909058,
|
| 23556 |
+
"learning_rate": 3.1632723026391503e-06,
|
| 23557 |
+
"loss": 1.8195,
|
| 23558 |
+
"step": 301900
|
| 23559 |
+
},
|
| 23560 |
+
{
|
| 23561 |
+
"epoch": 0.0390714393360088,
|
| 23562 |
+
"grad_norm": 1.089996099472046,
|
| 23563 |
+
"learning_rate": 3.1525011193047847e-06,
|
| 23564 |
+
"loss": 1.7961,
|
| 23565 |
+
"step": 302000
|
| 23566 |
+
},
|
| 23567 |
+
{
|
| 23568 |
+
"epoch": 0.0390714393360088,
|
| 23569 |
+
"eval_loss": 2.2081243991851807,
|
| 23570 |
+
"eval_runtime": 51.8009,
|
| 23571 |
+
"eval_samples_per_second": 196.792,
|
| 23572 |
+
"eval_steps_per_second": 1.544,
|
| 23573 |
+
"step": 302000
|
| 23574 |
+
},
|
| 23575 |
+
{
|
| 23576 |
+
"epoch": 0.039350521045551715,
|
| 23577 |
+
"grad_norm": 1.045300006866455,
|
| 23578 |
+
"learning_rate": 3.1417470714321275e-06,
|
| 23579 |
+
"loss": 1.8065,
|
| 23580 |
+
"step": 302100
|
| 23581 |
+
},
|
| 23582 |
+
{
|
| 23583 |
+
"epoch": 0.03962960275509464,
|
| 23584 |
+
"grad_norm": 1.0354883670806885,
|
| 23585 |
+
"learning_rate": 3.1310101674558e-06,
|
| 23586 |
+
"loss": 1.795,
|
| 23587 |
+
"step": 302200
|
| 23588 |
+
},
|
| 23589 |
+
{
|
| 23590 |
+
"epoch": 0.03990868446463756,
|
| 23591 |
+
"grad_norm": 1.07806396484375,
|
| 23592 |
+
"learning_rate": 3.1202904157969865e-06,
|
| 23593 |
+
"loss": 1.7949,
|
| 23594 |
+
"step": 302300
|
| 23595 |
+
},
|
| 23596 |
+
{
|
| 23597 |
+
"epoch": 0.040187766174180475,
|
| 23598 |
+
"grad_norm": 1.0537368059158325,
|
| 23599 |
+
"learning_rate": 3.1095878248634164e-06,
|
| 23600 |
+
"loss": 1.8252,
|
| 23601 |
+
"step": 302400
|
| 23602 |
+
},
|
| 23603 |
+
{
|
| 23604 |
+
"epoch": 0.04046684788372339,
|
| 23605 |
+
"grad_norm": 1.066607117652893,
|
| 23606 |
+
"learning_rate": 3.0989024030493723e-06,
|
| 23607 |
+
"loss": 1.7998,
|
| 23608 |
+
"step": 302500
|
| 23609 |
+
},
|
| 23610 |
+
{
|
| 23611 |
+
"epoch": 0.04074592959326632,
|
| 23612 |
+
"grad_norm": 1.0885719060897827,
|
| 23613 |
+
"learning_rate": 3.0882341587356476e-06,
|
| 23614 |
+
"loss": 1.8006,
|
| 23615 |
+
"step": 302600
|
| 23616 |
+
},
|
| 23617 |
+
{
|
| 23618 |
+
"epoch": 0.041025011302809236,
|
| 23619 |
+
"grad_norm": 1.0701121091842651,
|
| 23620 |
+
"learning_rate": 3.0775831002895774e-06,
|
| 23621 |
+
"loss": 1.8307,
|
| 23622 |
+
"step": 302700
|
| 23623 |
+
},
|
| 23624 |
+
{
|
| 23625 |
+
"epoch": 0.041304093012352154,
|
| 23626 |
+
"grad_norm": 1.045860767364502,
|
| 23627 |
+
"learning_rate": 3.0669492360650196e-06,
|
| 23628 |
+
"loss": 1.8094,
|
| 23629 |
+
"step": 302800
|
| 23630 |
+
},
|
| 23631 |
+
{
|
| 23632 |
+
"epoch": 0.04158317472189508,
|
| 23633 |
+
"grad_norm": 1.0620360374450684,
|
| 23634 |
+
"learning_rate": 3.056332574402346e-06,
|
| 23635 |
+
"loss": 1.8162,
|
| 23636 |
+
"step": 302900
|
| 23637 |
+
},
|
| 23638 |
+
{
|
| 23639 |
+
"epoch": 0.041862256431438,
|
| 23640 |
+
"grad_norm": 1.0758084058761597,
|
| 23641 |
+
"learning_rate": 3.0457331236284166e-06,
|
| 23642 |
+
"loss": 1.7981,
|
| 23643 |
+
"step": 303000
|
| 23644 |
+
},
|
| 23645 |
+
{
|
| 23646 |
+
"epoch": 0.041862256431438,
|
| 23647 |
+
"eval_loss": 2.2137293815612793,
|
| 23648 |
+
"eval_runtime": 51.9224,
|
| 23649 |
+
"eval_samples_per_second": 196.331,
|
| 23650 |
+
"eval_steps_per_second": 1.541,
|
| 23651 |
+
"step": 303000
|
| 23652 |
+
},
|
| 23653 |
+
{
|
| 23654 |
+
"epoch": 0.0002,
|
| 23655 |
+
"grad_norm": 1.0506842136383057,
|
| 23656 |
+
"learning_rate": 1.7259637505723265e-05,
|
| 23657 |
+
"loss": 1.7958,
|
| 23658 |
+
"step": 303100
|
| 23659 |
+
},
|
| 23660 |
+
{
|
| 23661 |
+
"epoch": 0.0004,
|
| 23662 |
+
"grad_norm": 1.034621000289917,
|
| 23663 |
+
"learning_rate": 1.7244552087867325e-05,
|
| 23664 |
+
"loss": 1.8261,
|
| 23665 |
+
"step": 303200
|
| 23666 |
+
},
|
| 23667 |
+
{
|
| 23668 |
+
"epoch": 0.0006,
|
| 23669 |
+
"grad_norm": 1.125115156173706,
|
| 23670 |
+
"learning_rate": 1.7229469793904873e-05,
|
| 23671 |
+
"loss": 1.8172,
|
| 23672 |
+
"step": 303300
|
| 23673 |
+
},
|
| 23674 |
+
{
|
| 23675 |
+
"epoch": 0.0008,
|
| 23676 |
+
"grad_norm": 1.0532312393188477,
|
| 23677 |
+
"learning_rate": 1.7214390629911066e-05,
|
| 23678 |
+
"loss": 1.8165,
|
| 23679 |
+
"step": 303400
|
| 23680 |
+
},
|
| 23681 |
+
{
|
| 23682 |
+
"epoch": 0.001,
|
| 23683 |
+
"grad_norm": 1.0483386516571045,
|
| 23684 |
+
"learning_rate": 1.7199314601959778e-05,
|
| 23685 |
+
"loss": 1.8275,
|
| 23686 |
+
"step": 303500
|
| 23687 |
+
},
|
| 23688 |
+
{
|
| 23689 |
+
"epoch": 0.0012,
|
| 23690 |
+
"grad_norm": 1.0204639434814453,
|
| 23691 |
+
"learning_rate": 1.7184241716123635e-05,
|
| 23692 |
+
"loss": 1.816,
|
| 23693 |
+
"step": 303600
|
| 23694 |
+
},
|
| 23695 |
+
{
|
| 23696 |
+
"epoch": 0.0014,
|
| 23697 |
+
"grad_norm": 1.069264531135559,
|
| 23698 |
+
"learning_rate": 1.7169171978473994e-05,
|
| 23699 |
+
"loss": 1.8174,
|
| 23700 |
+
"step": 303700
|
| 23701 |
+
},
|
| 23702 |
+
{
|
| 23703 |
+
"epoch": 0.0016,
|
| 23704 |
+
"grad_norm": 1.0591576099395752,
|
| 23705 |
+
"learning_rate": 1.715410539508095e-05,
|
| 23706 |
+
"loss": 1.8284,
|
| 23707 |
+
"step": 303800
|
| 23708 |
+
},
|
| 23709 |
+
{
|
| 23710 |
+
"epoch": 0.0018,
|
| 23711 |
+
"grad_norm": 1.086665391921997,
|
| 23712 |
+
"learning_rate": 1.7139041972013304e-05,
|
| 23713 |
+
"loss": 1.8279,
|
| 23714 |
+
"step": 303900
|
| 23715 |
+
},
|
| 23716 |
+
{
|
| 23717 |
+
"epoch": 0.002,
|
| 23718 |
+
"grad_norm": 1.094480037689209,
|
| 23719 |
+
"learning_rate": 1.712398171533862e-05,
|
| 23720 |
+
"loss": 1.8144,
|
| 23721 |
+
"step": 304000
|
| 23722 |
+
},
|
| 23723 |
+
{
|
| 23724 |
+
"epoch": 0.002,
|
| 23725 |
+
"eval_loss": 2.229548931121826,
|
| 23726 |
+
"eval_runtime": 52.0986,
|
| 23727 |
+
"eval_samples_per_second": 195.667,
|
| 23728 |
+
"eval_steps_per_second": 1.536,
|
| 23729 |
+
"step": 304000
|
| 23730 |
+
},
|
| 23731 |
+
{
|
| 23732 |
+
"epoch": 0.0022,
|
| 23733 |
+
"grad_norm": 1.0918675661087036,
|
| 23734 |
+
"learning_rate": 1.710892463112316e-05,
|
| 23735 |
+
"loss": 1.8137,
|
| 23736 |
+
"step": 304100
|
| 23737 |
+
},
|
| 23738 |
+
{
|
| 23739 |
+
"epoch": 0.0024,
|
| 23740 |
+
"grad_norm": 1.0117205381393433,
|
| 23741 |
+
"learning_rate": 1.709387072543191e-05,
|
| 23742 |
+
"loss": 1.8065,
|
| 23743 |
+
"step": 304200
|
| 23744 |
+
},
|
| 23745 |
+
{
|
| 23746 |
+
"epoch": 0.0026,
|
| 23747 |
+
"grad_norm": 1.1326615810394287,
|
| 23748 |
+
"learning_rate": 1.7078820004328587e-05,
|
| 23749 |
+
"loss": 1.814,
|
| 23750 |
+
"step": 304300
|
| 23751 |
+
},
|
| 23752 |
+
{
|
| 23753 |
+
"epoch": 0.0028,
|
| 23754 |
+
"grad_norm": 1.048861026763916,
|
| 23755 |
+
"learning_rate": 1.7063772473875616e-05,
|
| 23756 |
+
"loss": 1.8061,
|
| 23757 |
+
"step": 304400
|
| 23758 |
+
},
|
| 23759 |
+
{
|
| 23760 |
+
"epoch": 0.003,
|
| 23761 |
+
"grad_norm": 1.0585246086120605,
|
| 23762 |
+
"learning_rate": 1.7048728140134152e-05,
|
| 23763 |
+
"loss": 1.8293,
|
| 23764 |
+
"step": 304500
|
| 23765 |
+
},
|
| 23766 |
+
{
|
| 23767 |
+
"epoch": 0.0032,
|
| 23768 |
+
"grad_norm": 1.0181670188903809,
|
| 23769 |
+
"learning_rate": 1.7033687009164033e-05,
|
| 23770 |
+
"loss": 1.8163,
|
| 23771 |
+
"step": 304600
|
| 23772 |
+
},
|
| 23773 |
+
{
|
| 23774 |
+
"epoch": 0.0034,
|
| 23775 |
+
"grad_norm": 1.0025696754455566,
|
| 23776 |
+
"learning_rate": 1.701864908702384e-05,
|
| 23777 |
+
"loss": 1.8058,
|
| 23778 |
+
"step": 304700
|
| 23779 |
+
},
|
| 23780 |
+
{
|
| 23781 |
+
"epoch": 0.0036,
|
| 23782 |
+
"grad_norm": 1.0825532674789429,
|
| 23783 |
+
"learning_rate": 1.700361437977084e-05,
|
| 23784 |
+
"loss": 1.818,
|
| 23785 |
+
"step": 304800
|
| 23786 |
+
},
|
| 23787 |
+
{
|
| 23788 |
+
"epoch": 0.0038,
|
| 23789 |
+
"grad_norm": 1.0389013290405273,
|
| 23790 |
+
"learning_rate": 1.6988582893461008e-05,
|
| 23791 |
+
"loss": 1.8142,
|
| 23792 |
+
"step": 304900
|
| 23793 |
+
},
|
| 23794 |
+
{
|
| 23795 |
+
"epoch": 0.004,
|
| 23796 |
+
"grad_norm": 1.0209424495697021,
|
| 23797 |
+
"learning_rate": 1.697355463414903e-05,
|
| 23798 |
+
"loss": 1.8103,
|
| 23799 |
+
"step": 305000
|
| 23800 |
+
},
|
| 23801 |
+
{
|
| 23802 |
+
"epoch": 0.004,
|
| 23803 |
+
"eval_loss": 2.230199098587036,
|
| 23804 |
+
"eval_runtime": 51.6157,
|
| 23805 |
+
"eval_samples_per_second": 197.498,
|
| 23806 |
+
"eval_steps_per_second": 1.55,
|
| 23807 |
+
"step": 305000
|
| 23808 |
+
},
|
| 23809 |
+
{
|
| 23810 |
+
"epoch": 0.0042,
|
| 23811 |
+
"grad_norm": 1.085379958152771,
|
| 23812 |
+
"learning_rate": 1.695852960788829e-05,
|
| 23813 |
+
"loss": 1.8192,
|
| 23814 |
+
"step": 305100
|
| 23815 |
+
},
|
| 23816 |
+
{
|
| 23817 |
+
"epoch": 0.0044,
|
| 23818 |
+
"grad_norm": 1.0386351346969604,
|
| 23819 |
+
"learning_rate": 1.6943507820730854e-05,
|
| 23820 |
+
"loss": 1.8061,
|
| 23821 |
+
"step": 305200
|
| 23822 |
+
},
|
| 23823 |
+
{
|
| 23824 |
+
"epoch": 0.0046,
|
| 23825 |
+
"grad_norm": 1.0565484762191772,
|
| 23826 |
+
"learning_rate": 1.692848927872751e-05,
|
| 23827 |
+
"loss": 1.8081,
|
| 23828 |
+
"step": 305300
|
| 23829 |
+
},
|
| 23830 |
+
{
|
| 23831 |
+
"epoch": 0.0048,
|
| 23832 |
+
"grad_norm": 1.0819813013076782,
|
| 23833 |
+
"learning_rate": 1.6913473987927713e-05,
|
| 23834 |
+
"loss": 1.8158,
|
| 23835 |
+
"step": 305400
|
| 23836 |
+
},
|
| 23837 |
+
{
|
| 23838 |
+
"epoch": 0.005,
|
| 23839 |
+
"grad_norm": 1.0319418907165527,
|
| 23840 |
+
"learning_rate": 1.6898461954379636e-05,
|
| 23841 |
+
"loss": 1.7954,
|
| 23842 |
+
"step": 305500
|
| 23843 |
+
},
|
| 23844 |
+
{
|
| 23845 |
+
"epoch": 0.0052,
|
| 23846 |
+
"grad_norm": 1.0530176162719727,
|
| 23847 |
+
"learning_rate": 1.6883453184130116e-05,
|
| 23848 |
+
"loss": 1.8046,
|
| 23849 |
+
"step": 305600
|
| 23850 |
+
},
|
| 23851 |
+
{
|
| 23852 |
+
"epoch": 0.0054,
|
| 23853 |
+
"grad_norm": 1.0865267515182495,
|
| 23854 |
+
"learning_rate": 1.686844768322467e-05,
|
| 23855 |
+
"loss": 1.7917,
|
| 23856 |
+
"step": 305700
|
| 23857 |
+
},
|
| 23858 |
+
{
|
| 23859 |
+
"epoch": 0.0056,
|
| 23860 |
+
"grad_norm": 1.027178406715393,
|
| 23861 |
+
"learning_rate": 1.6853445457707538e-05,
|
| 23862 |
+
"loss": 1.7988,
|
| 23863 |
+
"step": 305800
|
| 23864 |
+
},
|
| 23865 |
+
{
|
| 23866 |
+
"epoch": 0.0058,
|
| 23867 |
+
"grad_norm": 1.0627230405807495,
|
| 23868 |
+
"learning_rate": 1.6838446513621593e-05,
|
| 23869 |
+
"loss": 1.7954,
|
| 23870 |
+
"step": 305900
|
| 23871 |
+
},
|
| 23872 |
+
{
|
| 23873 |
+
"epoch": 0.006,
|
| 23874 |
+
"grad_norm": 1.059670329093933,
|
| 23875 |
+
"learning_rate": 1.6823450857008423e-05,
|
| 23876 |
+
"loss": 1.7974,
|
| 23877 |
+
"step": 306000
|
| 23878 |
+
},
|
| 23879 |
+
{
|
| 23880 |
+
"epoch": 0.006,
|
| 23881 |
+
"eval_loss": 2.230013608932495,
|
| 23882 |
+
"eval_runtime": 51.6205,
|
| 23883 |
+
"eval_samples_per_second": 197.48,
|
| 23884 |
+
"eval_steps_per_second": 1.55,
|
| 23885 |
+
"step": 306000
|
| 23886 |
+
},
|
| 23887 |
+
{
|
| 23888 |
+
"epoch": 0.0062,
|
| 23889 |
+
"grad_norm": 1.0403988361358643,
|
| 23890 |
+
"learning_rate": 1.6808458493908258e-05,
|
| 23891 |
+
"loss": 1.7976,
|
| 23892 |
+
"step": 306100
|
| 23893 |
+
},
|
| 23894 |
+
{
|
| 23895 |
+
"epoch": 0.0064,
|
| 23896 |
+
"grad_norm": 1.0143063068389893,
|
| 23897 |
+
"learning_rate": 1.6793469430360042e-05,
|
| 23898 |
+
"loss": 1.7949,
|
| 23899 |
+
"step": 306200
|
| 23900 |
+
},
|
| 23901 |
+
{
|
| 23902 |
+
"epoch": 0.0066,
|
| 23903 |
+
"grad_norm": 1.1919389963150024,
|
| 23904 |
+
"learning_rate": 1.6778483672401356e-05,
|
| 23905 |
+
"loss": 1.8018,
|
| 23906 |
+
"step": 306300
|
| 23907 |
+
},
|
| 23908 |
+
{
|
| 23909 |
+
"epoch": 0.0068,
|
| 23910 |
+
"grad_norm": 1.06490957736969,
|
| 23911 |
+
"learning_rate": 1.6763501226068465e-05,
|
| 23912 |
+
"loss": 1.8087,
|
| 23913 |
+
"step": 306400
|
| 23914 |
+
},
|
| 23915 |
+
{
|
| 23916 |
+
"epoch": 0.007,
|
| 23917 |
+
"grad_norm": 1.0884573459625244,
|
| 23918 |
+
"learning_rate": 1.674852209739629e-05,
|
| 23919 |
+
"loss": 1.8177,
|
| 23920 |
+
"step": 306500
|
| 23921 |
+
},
|
| 23922 |
+
{
|
| 23923 |
+
"epoch": 0.0072,
|
| 23924 |
+
"grad_norm": 1.0523546934127808,
|
| 23925 |
+
"learning_rate": 1.6733546292418434e-05,
|
| 23926 |
+
"loss": 1.7789,
|
| 23927 |
+
"step": 306600
|
| 23928 |
+
},
|
| 23929 |
+
{
|
| 23930 |
+
"epoch": 0.0074,
|
| 23931 |
+
"grad_norm": 1.0929498672485352,
|
| 23932 |
+
"learning_rate": 1.6718573817167137e-05,
|
| 23933 |
+
"loss": 1.8022,
|
| 23934 |
+
"step": 306700
|
| 23935 |
+
},
|
| 23936 |
+
{
|
| 23937 |
+
"epoch": 0.0076,
|
| 23938 |
+
"grad_norm": 1.0335514545440674,
|
| 23939 |
+
"learning_rate": 1.6703604677673322e-05,
|
| 23940 |
+
"loss": 1.7912,
|
| 23941 |
+
"step": 306800
|
| 23942 |
+
},
|
| 23943 |
+
{
|
| 23944 |
+
"epoch": 0.0078,
|
| 23945 |
+
"grad_norm": 1.0258134603500366,
|
| 23946 |
+
"learning_rate": 1.6688638879966546e-05,
|
| 23947 |
+
"loss": 1.7952,
|
| 23948 |
+
"step": 306900
|
| 23949 |
+
},
|
| 23950 |
+
{
|
| 23951 |
+
"epoch": 0.008,
|
| 23952 |
+
"grad_norm": 1.0420570373535156,
|
| 23953 |
+
"learning_rate": 1.6673676430075036e-05,
|
| 23954 |
+
"loss": 1.7981,
|
| 23955 |
+
"step": 307000
|
| 23956 |
+
},
|
| 23957 |
+
{
|
| 23958 |
+
"epoch": 0.008,
|
| 23959 |
+
"eval_loss": 2.228384256362915,
|
| 23960 |
+
"eval_runtime": 51.6428,
|
| 23961 |
+
"eval_samples_per_second": 197.395,
|
| 23962 |
+
"eval_steps_per_second": 1.549,
|
| 23963 |
+
"step": 307000
|
| 23964 |
+
},
|
| 23965 |
+
{
|
| 23966 |
+
"epoch": 0.0082,
|
| 23967 |
+
"grad_norm": 1.065299391746521,
|
| 23968 |
+
"learning_rate": 1.6658717334025664e-05,
|
| 23969 |
+
"loss": 1.8051,
|
| 23970 |
+
"step": 307100
|
| 23971 |
+
},
|
| 23972 |
+
{
|
| 23973 |
+
"epoch": 0.0084,
|
| 23974 |
+
"grad_norm": 1.015187382698059,
|
| 23975 |
+
"learning_rate": 1.6643761597843953e-05,
|
| 23976 |
+
"loss": 1.8016,
|
| 23977 |
+
"step": 307200
|
| 23978 |
+
},
|
| 23979 |
+
{
|
| 23980 |
+
"epoch": 0.0086,
|
| 23981 |
+
"grad_norm": 1.047338843345642,
|
| 23982 |
+
"learning_rate": 1.6628809227554077e-05,
|
| 23983 |
+
"loss": 1.7974,
|
| 23984 |
+
"step": 307300
|
| 23985 |
+
},
|
| 23986 |
+
{
|
| 23987 |
+
"epoch": 0.0088,
|
| 23988 |
+
"grad_norm": 1.0116043090820312,
|
| 23989 |
+
"learning_rate": 1.6613860229178836e-05,
|
| 23990 |
+
"loss": 1.793,
|
| 23991 |
+
"step": 307400
|
| 23992 |
+
},
|
| 23993 |
+
{
|
| 23994 |
+
"epoch": 0.009,
|
| 23995 |
+
"grad_norm": 1.0261743068695068,
|
| 23996 |
+
"learning_rate": 1.6598914608739695e-05,
|
| 23997 |
+
"loss": 1.789,
|
| 23998 |
+
"step": 307500
|
| 23999 |
+
},
|
| 24000 |
+
{
|
| 24001 |
+
"epoch": 0.0092,
|
| 24002 |
+
"grad_norm": 1.0221142768859863,
|
| 24003 |
+
"learning_rate": 1.658397237225674e-05,
|
| 24004 |
+
"loss": 1.7865,
|
| 24005 |
+
"step": 307600
|
| 24006 |
+
},
|
| 24007 |
+
{
|
| 24008 |
+
"epoch": 0.0094,
|
| 24009 |
+
"grad_norm": 1.050794005393982,
|
| 24010 |
+
"learning_rate": 1.6569033525748712e-05,
|
| 24011 |
+
"loss": 1.7725,
|
| 24012 |
+
"step": 307700
|
| 24013 |
+
},
|
| 24014 |
+
{
|
| 24015 |
+
"epoch": 0.0096,
|
| 24016 |
+
"grad_norm": 1.1043586730957031,
|
| 24017 |
+
"learning_rate": 1.6554098075232967e-05,
|
| 24018 |
+
"loss": 1.7772,
|
| 24019 |
+
"step": 307800
|
| 24020 |
+
},
|
| 24021 |
+
{
|
| 24022 |
+
"epoch": 0.0098,
|
| 24023 |
+
"grad_norm": 1.0293883085250854,
|
| 24024 |
+
"learning_rate": 1.6539166026725515e-05,
|
| 24025 |
+
"loss": 1.8076,
|
| 24026 |
+
"step": 307900
|
| 24027 |
+
},
|
| 24028 |
+
{
|
| 24029 |
+
"epoch": 0.01,
|
| 24030 |
+
"grad_norm": 1.0498391389846802,
|
| 24031 |
+
"learning_rate": 1.6524237386240964e-05,
|
| 24032 |
+
"loss": 1.7978,
|
| 24033 |
+
"step": 308000
|
| 24034 |
+
},
|
| 24035 |
+
{
|
| 24036 |
+
"epoch": 0.01,
|
| 24037 |
+
"eval_loss": 2.2343056201934814,
|
| 24038 |
+
"eval_runtime": 51.6864,
|
| 24039 |
+
"eval_samples_per_second": 197.228,
|
| 24040 |
+
"eval_steps_per_second": 1.548,
|
| 24041 |
+
"step": 308000
|
| 24042 |
+
},
|
| 24043 |
+
{
|
| 24044 |
+
"epoch": 0.0102,
|
| 24045 |
+
"grad_norm": 1.0795516967773438,
|
| 24046 |
+
"learning_rate": 1.6509312159792594e-05,
|
| 24047 |
+
"loss": 1.8164,
|
| 24048 |
+
"step": 308100
|
| 24049 |
+
},
|
| 24050 |
+
{
|
| 24051 |
+
"epoch": 0.0104,
|
| 24052 |
+
"grad_norm": 1.0950493812561035,
|
| 24053 |
+
"learning_rate": 1.6494390353392258e-05,
|
| 24054 |
+
"loss": 1.7901,
|
| 24055 |
+
"step": 308200
|
| 24056 |
+
},
|
| 24057 |
+
{
|
| 24058 |
+
"epoch": 0.0106,
|
| 24059 |
+
"grad_norm": 1.0679363012313843,
|
| 24060 |
+
"learning_rate": 1.6479471973050482e-05,
|
| 24061 |
+
"loss": 1.8094,
|
| 24062 |
+
"step": 308300
|
| 24063 |
+
},
|
| 24064 |
+
{
|
| 24065 |
+
"epoch": 0.0108,
|
| 24066 |
+
"grad_norm": 1.0638396739959717,
|
| 24067 |
+
"learning_rate": 1.6464557024776365e-05,
|
| 24068 |
+
"loss": 1.7981,
|
| 24069 |
+
"step": 308400
|
| 24070 |
+
},
|
| 24071 |
+
{
|
| 24072 |
+
"epoch": 0.011,
|
| 24073 |
+
"grad_norm": 1.0541220903396606,
|
| 24074 |
+
"learning_rate": 1.6449645514577668e-05,
|
| 24075 |
+
"loss": 1.7955,
|
| 24076 |
+
"step": 308500
|
| 24077 |
+
},
|
| 24078 |
+
{
|
| 24079 |
+
"epoch": 0.0112,
|
| 24080 |
+
"grad_norm": 1.0506057739257812,
|
| 24081 |
+
"learning_rate": 1.6434737448460725e-05,
|
| 24082 |
+
"loss": 1.7793,
|
| 24083 |
+
"step": 308600
|
| 24084 |
+
},
|
| 24085 |
+
{
|
| 24086 |
+
"epoch": 0.0114,
|
| 24087 |
+
"grad_norm": 1.0873394012451172,
|
| 24088 |
+
"learning_rate": 1.6419832832430522e-05,
|
| 24089 |
+
"loss": 1.7941,
|
| 24090 |
+
"step": 308700
|
| 24091 |
+
},
|
| 24092 |
+
{
|
| 24093 |
+
"epoch": 0.0116,
|
| 24094 |
+
"grad_norm": 1.0632107257843018,
|
| 24095 |
+
"learning_rate": 1.6404931672490625e-05,
|
| 24096 |
+
"loss": 1.7861,
|
| 24097 |
+
"step": 308800
|
| 24098 |
+
},
|
| 24099 |
+
{
|
| 24100 |
+
"epoch": 0.0118,
|
| 24101 |
+
"grad_norm": 1.1098015308380127,
|
| 24102 |
+
"learning_rate": 1.6390033974643222e-05,
|
| 24103 |
+
"loss": 1.7709,
|
| 24104 |
+
"step": 308900
|
| 24105 |
+
},
|
| 24106 |
+
{
|
| 24107 |
+
"epoch": 0.012,
|
| 24108 |
+
"grad_norm": 1.046675682067871,
|
| 24109 |
+
"learning_rate": 1.6375139744889107e-05,
|
| 24110 |
+
"loss": 1.7811,
|
| 24111 |
+
"step": 309000
|
| 24112 |
+
},
|
| 24113 |
+
{
|
| 24114 |
+
"epoch": 0.012,
|
| 24115 |
+
"eval_loss": 2.2420222759246826,
|
| 24116 |
+
"eval_runtime": 51.584,
|
| 24117 |
+
"eval_samples_per_second": 197.619,
|
| 24118 |
+
"eval_steps_per_second": 1.551,
|
| 24119 |
+
"step": 309000
|
| 24120 |
+
},
|
| 24121 |
+
{
|
| 24122 |
+
"epoch": 0.0122,
|
| 24123 |
+
"grad_norm": 1.047875165939331,
|
| 24124 |
+
"learning_rate": 1.6360248989227666e-05,
|
| 24125 |
+
"loss": 1.7818,
|
| 24126 |
+
"step": 309100
|
| 24127 |
+
},
|
| 24128 |
+
{
|
| 24129 |
+
"epoch": 0.0124,
|
| 24130 |
+
"grad_norm": 1.0503313541412354,
|
| 24131 |
+
"learning_rate": 1.6345361713656904e-05,
|
| 24132 |
+
"loss": 1.7718,
|
| 24133 |
+
"step": 309200
|
| 24134 |
+
},
|
| 24135 |
+
{
|
| 24136 |
+
"epoch": 0.0126,
|
| 24137 |
+
"grad_norm": 1.046026587486267,
|
| 24138 |
+
"learning_rate": 1.6330477924173403e-05,
|
| 24139 |
+
"loss": 1.7518,
|
| 24140 |
+
"step": 309300
|
| 24141 |
+
},
|
| 24142 |
+
{
|
| 24143 |
+
"epoch": 0.0128,
|
| 24144 |
+
"grad_norm": 1.0461571216583252,
|
| 24145 |
+
"learning_rate": 1.6315597626772365e-05,
|
| 24146 |
+
"loss": 1.7751,
|
| 24147 |
+
"step": 309400
|
| 24148 |
+
},
|
| 24149 |
+
{
|
| 24150 |
+
"epoch": 0.013,
|
| 24151 |
+
"grad_norm": 1.0191349983215332,
|
| 24152 |
+
"learning_rate": 1.6300720827447556e-05,
|
| 24153 |
+
"loss": 1.7724,
|
| 24154 |
+
"step": 309500
|
| 24155 |
+
},
|
| 24156 |
+
{
|
| 24157 |
+
"epoch": 0.0132,
|
| 24158 |
+
"grad_norm": 1.0420078039169312,
|
| 24159 |
+
"learning_rate": 1.6285847532191364e-05,
|
| 24160 |
+
"loss": 1.7394,
|
| 24161 |
+
"step": 309600
|
| 24162 |
+
},
|
| 24163 |
+
{
|
| 24164 |
+
"epoch": 0.0134,
|
| 24165 |
+
"grad_norm": 1.0415441989898682,
|
| 24166 |
+
"learning_rate": 1.627097774699474e-05,
|
| 24167 |
+
"loss": 1.7405,
|
| 24168 |
+
"step": 309700
|
| 24169 |
+
},
|
| 24170 |
+
{
|
| 24171 |
+
"epoch": 0.0136,
|
| 24172 |
+
"grad_norm": 1.0861761569976807,
|
| 24173 |
+
"learning_rate": 1.625611147784724e-05,
|
| 24174 |
+
"loss": 1.7572,
|
| 24175 |
+
"step": 309800
|
| 24176 |
+
},
|
| 24177 |
+
{
|
| 24178 |
+
"epoch": 0.0138,
|
| 24179 |
+
"grad_norm": 1.042179822921753,
|
| 24180 |
+
"learning_rate": 1.6241248730736985e-05,
|
| 24181 |
+
"loss": 1.7634,
|
| 24182 |
+
"step": 309900
|
| 24183 |
+
},
|
| 24184 |
+
{
|
| 24185 |
+
"epoch": 0.014,
|
| 24186 |
+
"grad_norm": 1.0887514352798462,
|
| 24187 |
+
"learning_rate": 1.6226389511650697e-05,
|
| 24188 |
+
"loss": 1.7487,
|
| 24189 |
+
"step": 310000
|
| 24190 |
+
},
|
| 24191 |
+
{
|
| 24192 |
+
"epoch": 0.014,
|
| 24193 |
+
"eval_loss": 2.244732618331909,
|
| 24194 |
+
"eval_runtime": 51.6991,
|
| 24195 |
+
"eval_samples_per_second": 197.18,
|
| 24196 |
+
"eval_steps_per_second": 1.547,
|
| 24197 |
+
"step": 310000
|
| 24198 |
+
},
|
| 24199 |
+
{
|
| 24200 |
+
"epoch": 0.0142,
|
| 24201 |
+
"grad_norm": 1.0510177612304688,
|
| 24202 |
+
"learning_rate": 1.6211533826573662e-05,
|
| 24203 |
+
"loss": 1.7426,
|
| 24204 |
+
"step": 310100
|
| 24205 |
+
},
|
| 24206 |
+
{
|
| 24207 |
+
"epoch": 0.0144,
|
| 24208 |
+
"grad_norm": 0.9902233481407166,
|
| 24209 |
+
"learning_rate": 1.6196681681489755e-05,
|
| 24210 |
+
"loss": 1.7452,
|
| 24211 |
+
"step": 310200
|
| 24212 |
+
},
|
| 24213 |
+
{
|
| 24214 |
+
"epoch": 0.0146,
|
| 24215 |
+
"grad_norm": 1.0358948707580566,
|
| 24216 |
+
"learning_rate": 1.6181833082381413e-05,
|
| 24217 |
+
"loss": 1.7292,
|
| 24218 |
+
"step": 310300
|
| 24219 |
+
},
|
| 24220 |
+
{
|
| 24221 |
+
"epoch": 0.0148,
|
| 24222 |
+
"grad_norm": 1.0080764293670654,
|
| 24223 |
+
"learning_rate": 1.6166988035229652e-05,
|
| 24224 |
+
"loss": 1.7368,
|
| 24225 |
+
"step": 310400
|
| 24226 |
+
},
|
| 24227 |
+
{
|
| 24228 |
+
"epoch": 0.015,
|
| 24229 |
+
"grad_norm": 1.0920326709747314,
|
| 24230 |
+
"learning_rate": 1.6152146546014053e-05,
|
| 24231 |
+
"loss": 1.7186,
|
| 24232 |
+
"step": 310500
|
| 24233 |
+
},
|
| 24234 |
+
{
|
| 24235 |
+
"epoch": 0.0152,
|
| 24236 |
+
"grad_norm": 1.0890278816223145,
|
| 24237 |
+
"learning_rate": 1.6137308620712765e-05,
|
| 24238 |
+
"loss": 1.7179,
|
| 24239 |
+
"step": 310600
|
| 24240 |
+
},
|
| 24241 |
+
{
|
| 24242 |
+
"epoch": 0.0154,
|
| 24243 |
+
"grad_norm": 1.0208715200424194,
|
| 24244 |
+
"learning_rate": 1.612247426530251e-05,
|
| 24245 |
+
"loss": 1.744,
|
| 24246 |
+
"step": 310700
|
| 24247 |
+
},
|
| 24248 |
+
{
|
| 24249 |
+
"epoch": 0.0156,
|
| 24250 |
+
"grad_norm": 0.9500866532325745,
|
| 24251 |
+
"learning_rate": 1.610764348575856e-05,
|
| 24252 |
+
"loss": 1.6606,
|
| 24253 |
+
"step": 310800
|
| 24254 |
+
},
|
| 24255 |
+
{
|
| 24256 |
+
"epoch": 0.0158,
|
| 24257 |
+
"grad_norm": 0.9557023048400879,
|
| 24258 |
+
"learning_rate": 1.6092816288054746e-05,
|
| 24259 |
+
"loss": 1.4109,
|
| 24260 |
+
"step": 310900
|
| 24261 |
+
},
|
| 24262 |
+
{
|
| 24263 |
+
"epoch": 0.016,
|
| 24264 |
+
"grad_norm": 0.9185681343078613,
|
| 24265 |
+
"learning_rate": 1.6077992678163467e-05,
|
| 24266 |
+
"loss": 1.3687,
|
| 24267 |
+
"step": 311000
|
| 24268 |
+
},
|
| 24269 |
+
{
|
| 24270 |
+
"epoch": 0.016,
|
| 24271 |
+
"eval_loss": 2.290562629699707,
|
| 24272 |
+
"eval_runtime": 51.8115,
|
| 24273 |
+
"eval_samples_per_second": 196.752,
|
| 24274 |
+
"eval_steps_per_second": 1.544,
|
| 24275 |
+
"step": 311000
|
| 24276 |
+
},
|
| 24277 |
+
{
|
| 24278 |
+
"epoch": 0.0162,
|
| 24279 |
+
"grad_norm": 0.9145857691764832,
|
| 24280 |
+
"learning_rate": 1.6063172662055665e-05,
|
| 24281 |
+
"loss": 1.3382,
|
| 24282 |
+
"step": 311100
|
| 24283 |
+
},
|
| 24284 |
+
{
|
| 24285 |
+
"epoch": 0.0164,
|
| 24286 |
+
"grad_norm": 0.9351733922958374,
|
| 24287 |
+
"learning_rate": 1.6048356245700856e-05,
|
| 24288 |
+
"loss": 1.3208,
|
| 24289 |
+
"step": 311200
|
| 24290 |
+
},
|
| 24291 |
+
{
|
| 24292 |
+
"epoch": 0.0166,
|
| 24293 |
+
"grad_norm": 0.9079789519309998,
|
| 24294 |
+
"learning_rate": 1.603354343506707e-05,
|
| 24295 |
+
"loss": 1.2985,
|
| 24296 |
+
"step": 311300
|
| 24297 |
+
},
|
| 24298 |
+
{
|
| 24299 |
+
"epoch": 0.0168,
|
| 24300 |
+
"grad_norm": 1.0671257972717285,
|
| 24301 |
+
"learning_rate": 1.6018734236120926e-05,
|
| 24302 |
+
"loss": 1.3041,
|
| 24303 |
+
"step": 311400
|
| 24304 |
+
},
|
| 24305 |
+
{
|
| 24306 |
+
"epoch": 0.017,
|
| 24307 |
+
"grad_norm": 0.8702555894851685,
|
| 24308 |
+
"learning_rate": 1.600392865482755e-05,
|
| 24309 |
+
"loss": 1.278,
|
| 24310 |
+
"step": 311500
|
| 24311 |
+
},
|
| 24312 |
+
{
|
| 24313 |
+
"epoch": 0.0172,
|
| 24314 |
+
"grad_norm": 0.9119271039962769,
|
| 24315 |
+
"learning_rate": 1.598912669715064e-05,
|
| 24316 |
+
"loss": 1.2662,
|
| 24317 |
+
"step": 311600
|
| 24318 |
+
},
|
| 24319 |
+
{
|
| 24320 |
+
"epoch": 0.0174,
|
| 24321 |
+
"grad_norm": 0.8721778988838196,
|
| 24322 |
+
"learning_rate": 1.5974328369052415e-05,
|
| 24323 |
+
"loss": 1.2713,
|
| 24324 |
+
"step": 311700
|
| 24325 |
+
},
|
| 24326 |
+
{
|
| 24327 |
+
"epoch": 0.0176,
|
| 24328 |
+
"grad_norm": 0.9360621571540833,
|
| 24329 |
+
"learning_rate": 1.5959533676493647e-05,
|
| 24330 |
+
"loss": 1.2523,
|
| 24331 |
+
"step": 311800
|
| 24332 |
+
},
|
| 24333 |
+
{
|
| 24334 |
+
"epoch": 0.0178,
|
| 24335 |
+
"grad_norm": 0.9057286381721497,
|
| 24336 |
+
"learning_rate": 1.5944742625433633e-05,
|
| 24337 |
+
"loss": 1.2308,
|
| 24338 |
+
"step": 311900
|
| 24339 |
+
},
|
| 24340 |
+
{
|
| 24341 |
+
"epoch": 0.018,
|
| 24342 |
+
"grad_norm": 0.874999463558197,
|
| 24343 |
+
"learning_rate": 1.5929955221830202e-05,
|
| 24344 |
+
"loss": 1.2274,
|
| 24345 |
+
"step": 312000
|
| 24346 |
+
},
|
| 24347 |
+
{
|
| 24348 |
+
"epoch": 0.018,
|
| 24349 |
+
"eval_loss": 2.374765634536743,
|
| 24350 |
+
"eval_runtime": 51.8773,
|
| 24351 |
+
"eval_samples_per_second": 196.502,
|
| 24352 |
+
"eval_steps_per_second": 1.542,
|
| 24353 |
+
"step": 312000
|
| 24354 |
+
},
|
| 24355 |
+
{
|
| 24356 |
+
"epoch": 0.0182,
|
| 24357 |
+
"grad_norm": 0.9309009313583374,
|
| 24358 |
+
"learning_rate": 1.591517147163973e-05,
|
| 24359 |
+
"loss": 1.224,
|
| 24360 |
+
"step": 312100
|
| 24361 |
+
},
|
| 24362 |
+
{
|
| 24363 |
+
"epoch": 0.0184,
|
| 24364 |
+
"grad_norm": 0.8504728674888611,
|
| 24365 |
+
"learning_rate": 1.59003913808171e-05,
|
| 24366 |
+
"loss": 1.2055,
|
| 24367 |
+
"step": 312200
|
| 24368 |
+
},
|
| 24369 |
+
{
|
| 24370 |
+
"epoch": 0.0186,
|
| 24371 |
+
"grad_norm": 0.9231265783309937,
|
| 24372 |
+
"learning_rate": 1.588561495531573e-05,
|
| 24373 |
+
"loss": 1.2074,
|
| 24374 |
+
"step": 312300
|
| 24375 |
+
},
|
| 24376 |
+
{
|
| 24377 |
+
"epoch": 0.0188,
|
| 24378 |
+
"grad_norm": 0.9524025321006775,
|
| 24379 |
+
"learning_rate": 1.587084220108757e-05,
|
| 24380 |
+
"loss": 1.1945,
|
| 24381 |
+
"step": 312400
|
| 24382 |
+
},
|
| 24383 |
+
{
|
| 24384 |
+
"epoch": 0.019,
|
| 24385 |
+
"grad_norm": 0.8538132309913635,
|
| 24386 |
+
"learning_rate": 1.585607312408308e-05,
|
| 24387 |
+
"loss": 1.202,
|
| 24388 |
+
"step": 312500
|
| 24389 |
+
},
|
| 24390 |
+
{
|
| 24391 |
+
"epoch": 0.0192,
|
| 24392 |
+
"grad_norm": 1.1738858222961426,
|
| 24393 |
+
"learning_rate": 1.5841307730251237e-05,
|
| 24394 |
+
"loss": 1.1787,
|
| 24395 |
+
"step": 312600
|
| 24396 |
+
},
|
| 24397 |
+
{
|
| 24398 |
+
"epoch": 0.0194,
|
| 24399 |
+
"grad_norm": 0.9254825711250305,
|
| 24400 |
+
"learning_rate": 1.5826546025539552e-05,
|
| 24401 |
+
"loss": 1.1737,
|
| 24402 |
+
"step": 312700
|
| 24403 |
+
},
|
| 24404 |
+
{
|
| 24405 |
+
"epoch": 0.0196,
|
| 24406 |
+
"grad_norm": 0.8884557485580444,
|
| 24407 |
+
"learning_rate": 1.5811788015894025e-05,
|
| 24408 |
+
"loss": 1.1715,
|
| 24409 |
+
"step": 312800
|
| 24410 |
+
},
|
| 24411 |
+
{
|
| 24412 |
+
"epoch": 0.0198,
|
| 24413 |
+
"grad_norm": 0.8768421411514282,
|
| 24414 |
+
"learning_rate": 1.579703370725919e-05,
|
| 24415 |
+
"loss": 1.1701,
|
| 24416 |
+
"step": 312900
|
| 24417 |
+
},
|
| 24418 |
+
{
|
| 24419 |
+
"epoch": 0.02,
|
| 24420 |
+
"grad_norm": 0.9079160690307617,
|
| 24421 |
+
"learning_rate": 1.5782283105578076e-05,
|
| 24422 |
+
"loss": 1.1533,
|
| 24423 |
+
"step": 313000
|
| 24424 |
+
},
|
| 24425 |
+
{
|
| 24426 |
+
"epoch": 0.02,
|
| 24427 |
+
"eval_loss": 2.40177059173584,
|
| 24428 |
+
"eval_runtime": 52.039,
|
| 24429 |
+
"eval_samples_per_second": 195.892,
|
| 24430 |
+
"eval_steps_per_second": 1.537,
|
| 24431 |
+
"step": 313000
|
| 24432 |
}
|
| 24433 |
],
|
| 24434 |
"logging_steps": 100,
|
| 24435 |
+
"max_steps": 500000,
|
| 24436 |
"num_input_tokens_seen": 0,
|
| 24437 |
"num_train_epochs": 9223372036854775807,
|
| 24438 |
"save_steps": 1000,
|
|
|
|
| 24448 |
"attributes": {}
|
| 24449 |
}
|
| 24450 |
},
|
| 24451 |
+
"total_flos": 2.731626434710733e+19,
|
| 24452 |
"train_batch_size": 128,
|
| 24453 |
"trial_name": null,
|
| 24454 |
"trial_params": null
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5777
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0dc725694ec7d7e0bc3b408e66c887f704ad47bb8c1c9fcffc5533d57950135
|
| 3 |
size 5777
|