Training in progress, step 22491, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1856040378
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e63155d8a8b26707565ffcd0b461dea5ebc052fa146ea4fa7e98b26ada3cad43
|
| 3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 928000378
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee8333b033064df00b889db112a3d891431c19c228f04607ae854217d617b7a2
|
| 3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b02b5189317f107b47761a33d54767287c36086e7e62e8c0878c096bde5797b4
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61686b58ac0c30ea6e7d706b512b9f5ec51fe76d0d476c61783415414e98dcf0
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.4320533275604248,
|
| 3 |
"best_model_checkpoint": "model/chessformer-3/checkpoint-22000",
|
| 4 |
-
"epoch": 0
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -77359,6 +77359,1721 @@
|
|
| 77359 |
"eval_samples_per_second": 554.141,
|
| 77360 |
"eval_steps_per_second": 69.288,
|
| 77361 |
"step": 22000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77362 |
}
|
| 77363 |
],
|
| 77364 |
"logging_steps": 2,
|
|
@@ -77373,12 +79088,12 @@
|
|
| 77373 |
"should_evaluate": false,
|
| 77374 |
"should_log": false,
|
| 77375 |
"should_save": true,
|
| 77376 |
-
"should_training_stop":
|
| 77377 |
},
|
| 77378 |
"attributes": {}
|
| 77379 |
}
|
| 77380 |
},
|
| 77381 |
-
"total_flos": 4.
|
| 77382 |
"train_batch_size": 768,
|
| 77383 |
"trial_name": null,
|
| 77384 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.4320533275604248,
|
| 3 |
"best_model_checkpoint": "model/chessformer-3/checkpoint-22000",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 22491,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 77359 |
"eval_samples_per_second": 554.141,
|
| 77360 |
"eval_steps_per_second": 69.288,
|
| 77361 |
"step": 22000
|
| 77362 |
+
},
|
| 77363 |
+
{
|
| 77364 |
+
"epoch": 0.9782579698546086,
|
| 77365 |
+
"grad_norm": 0.057963717728853226,
|
| 77366 |
+
"learning_rate": 1.2718103874865827e-06,
|
| 77367 |
+
"loss": 1.4517,
|
| 77368 |
+
"step": 22002
|
| 77369 |
+
},
|
| 77370 |
+
{
|
| 77371 |
+
"epoch": 0.9783468943132808,
|
| 77372 |
+
"grad_norm": 0.05782179906964302,
|
| 77373 |
+
"learning_rate": 1.261411450722838e-06,
|
| 77374 |
+
"loss": 1.4397,
|
| 77375 |
+
"step": 22004
|
| 77376 |
+
},
|
| 77377 |
+
{
|
| 77378 |
+
"epoch": 0.9784358187719532,
|
| 77379 |
+
"grad_norm": 0.05823444202542305,
|
| 77380 |
+
"learning_rate": 1.2510551483315146e-06,
|
| 77381 |
+
"loss": 1.4472,
|
| 77382 |
+
"step": 22006
|
| 77383 |
+
},
|
| 77384 |
+
{
|
| 77385 |
+
"epoch": 0.9785247432306255,
|
| 77386 |
+
"grad_norm": 0.057919129729270935,
|
| 77387 |
+
"learning_rate": 1.2407414811979601e-06,
|
| 77388 |
+
"loss": 1.4441,
|
| 77389 |
+
"step": 22008
|
| 77390 |
+
},
|
| 77391 |
+
{
|
| 77392 |
+
"epoch": 0.9786136676892979,
|
| 77393 |
+
"grad_norm": 0.058388952165842056,
|
| 77394 |
+
"learning_rate": 1.2304704502037467e-06,
|
| 77395 |
+
"loss": 1.4497,
|
| 77396 |
+
"step": 22010
|
| 77397 |
+
},
|
| 77398 |
+
{
|
| 77399 |
+
"epoch": 0.9787025921479703,
|
| 77400 |
+
"grad_norm": 0.057490069419145584,
|
| 77401 |
+
"learning_rate": 1.2202420562268946e-06,
|
| 77402 |
+
"loss": 1.4493,
|
| 77403 |
+
"step": 22012
|
| 77404 |
+
},
|
| 77405 |
+
{
|
| 77406 |
+
"epoch": 0.9787915166066427,
|
| 77407 |
+
"grad_norm": 0.05721807852387428,
|
| 77408 |
+
"learning_rate": 1.2100563001418708e-06,
|
| 77409 |
+
"loss": 1.4464,
|
| 77410 |
+
"step": 22014
|
| 77411 |
+
},
|
| 77412 |
+
{
|
| 77413 |
+
"epoch": 0.978880441065315,
|
| 77414 |
+
"grad_norm": 0.057543035596609116,
|
| 77415 |
+
"learning_rate": 1.1999131828192567e-06,
|
| 77416 |
+
"loss": 1.4467,
|
| 77417 |
+
"step": 22016
|
| 77418 |
+
},
|
| 77419 |
+
{
|
| 77420 |
+
"epoch": 0.9789693655239874,
|
| 77421 |
+
"grad_norm": 0.057324040681123734,
|
| 77422 |
+
"learning_rate": 1.1898127051262476e-06,
|
| 77423 |
+
"loss": 1.4435,
|
| 77424 |
+
"step": 22018
|
| 77425 |
+
},
|
| 77426 |
+
{
|
| 77427 |
+
"epoch": 0.9790582899826598,
|
| 77428 |
+
"grad_norm": 0.05752379819750786,
|
| 77429 |
+
"learning_rate": 1.1797548679262638e-06,
|
| 77430 |
+
"loss": 1.4421,
|
| 77431 |
+
"step": 22020
|
| 77432 |
+
},
|
| 77433 |
+
{
|
| 77434 |
+
"epoch": 0.9791472144413321,
|
| 77435 |
+
"grad_norm": 0.05681290104985237,
|
| 77436 |
+
"learning_rate": 1.1697396720790065e-06,
|
| 77437 |
+
"loss": 1.4482,
|
| 77438 |
+
"step": 22022
|
| 77439 |
+
},
|
| 77440 |
+
{
|
| 77441 |
+
"epoch": 0.9792361389000045,
|
| 77442 |
+
"grad_norm": 0.05741885304450989,
|
| 77443 |
+
"learning_rate": 1.15976711844068e-06,
|
| 77444 |
+
"loss": 1.4459,
|
| 77445 |
+
"step": 22024
|
| 77446 |
+
},
|
| 77447 |
+
{
|
| 77448 |
+
"epoch": 0.9793250633586768,
|
| 77449 |
+
"grad_norm": 0.05764324218034744,
|
| 77450 |
+
"learning_rate": 1.1498372078638243e-06,
|
| 77451 |
+
"loss": 1.4434,
|
| 77452 |
+
"step": 22026
|
| 77453 |
+
},
|
| 77454 |
+
{
|
| 77455 |
+
"epoch": 0.9794139878173491,
|
| 77456 |
+
"grad_norm": 0.05784038081765175,
|
| 77457 |
+
"learning_rate": 1.1399499411972048e-06,
|
| 77458 |
+
"loss": 1.4504,
|
| 77459 |
+
"step": 22028
|
| 77460 |
+
},
|
| 77461 |
+
{
|
| 77462 |
+
"epoch": 0.9795029122760215,
|
| 77463 |
+
"grad_norm": 0.05776010826230049,
|
| 77464 |
+
"learning_rate": 1.13010531928609e-06,
|
| 77465 |
+
"loss": 1.4494,
|
| 77466 |
+
"step": 22030
|
| 77467 |
+
},
|
| 77468 |
+
{
|
| 77469 |
+
"epoch": 0.9795918367346939,
|
| 77470 |
+
"grad_norm": 0.05751819536089897,
|
| 77471 |
+
"learning_rate": 1.1203033429719734e-06,
|
| 77472 |
+
"loss": 1.4483,
|
| 77473 |
+
"step": 22032
|
| 77474 |
+
},
|
| 77475 |
+
{
|
| 77476 |
+
"epoch": 0.9796807611933662,
|
| 77477 |
+
"grad_norm": 0.05655774101614952,
|
| 77478 |
+
"learning_rate": 1.1105440130929067e-06,
|
| 77479 |
+
"loss": 1.4474,
|
| 77480 |
+
"step": 22034
|
| 77481 |
+
},
|
| 77482 |
+
{
|
| 77483 |
+
"epoch": 0.9797696856520386,
|
| 77484 |
+
"grad_norm": 0.0575784407556057,
|
| 77485 |
+
"learning_rate": 1.1008273304830008e-06,
|
| 77486 |
+
"loss": 1.4497,
|
| 77487 |
+
"step": 22036
|
| 77488 |
+
},
|
| 77489 |
+
{
|
| 77490 |
+
"epoch": 0.979858610110711,
|
| 77491 |
+
"grad_norm": 0.05941782891750336,
|
| 77492 |
+
"learning_rate": 1.0911532959729797e-06,
|
| 77493 |
+
"loss": 1.45,
|
| 77494 |
+
"step": 22038
|
| 77495 |
+
},
|
| 77496 |
+
{
|
| 77497 |
+
"epoch": 0.9799475345693833,
|
| 77498 |
+
"grad_norm": 0.05766258016228676,
|
| 77499 |
+
"learning_rate": 1.0815219103897933e-06,
|
| 77500 |
+
"loss": 1.4442,
|
| 77501 |
+
"step": 22040
|
| 77502 |
+
},
|
| 77503 |
+
{
|
| 77504 |
+
"epoch": 0.9800364590280557,
|
| 77505 |
+
"grad_norm": 0.056954920291900635,
|
| 77506 |
+
"learning_rate": 1.0719331745567828e-06,
|
| 77507 |
+
"loss": 1.4441,
|
| 77508 |
+
"step": 22042
|
| 77509 |
+
},
|
| 77510 |
+
{
|
| 77511 |
+
"epoch": 0.9801253834867281,
|
| 77512 |
+
"grad_norm": 0.05810870602726936,
|
| 77513 |
+
"learning_rate": 1.0623870892936261e-06,
|
| 77514 |
+
"loss": 1.4414,
|
| 77515 |
+
"step": 22044
|
| 77516 |
+
},
|
| 77517 |
+
{
|
| 77518 |
+
"epoch": 0.9802143079454004,
|
| 77519 |
+
"grad_norm": 0.05758174881339073,
|
| 77520 |
+
"learning_rate": 1.0528836554163368e-06,
|
| 77521 |
+
"loss": 1.4461,
|
| 77522 |
+
"step": 22046
|
| 77523 |
+
},
|
| 77524 |
+
{
|
| 77525 |
+
"epoch": 0.9803032324040727,
|
| 77526 |
+
"grad_norm": 0.05731727182865143,
|
| 77527 |
+
"learning_rate": 1.0434228737373764e-06,
|
| 77528 |
+
"loss": 1.4426,
|
| 77529 |
+
"step": 22048
|
| 77530 |
+
},
|
| 77531 |
+
{
|
| 77532 |
+
"epoch": 0.9803921568627451,
|
| 77533 |
+
"grad_norm": 0.05725093558430672,
|
| 77534 |
+
"learning_rate": 1.034004745065431e-06,
|
| 77535 |
+
"loss": 1.4474,
|
| 77536 |
+
"step": 22050
|
| 77537 |
+
},
|
| 77538 |
+
{
|
| 77539 |
+
"epoch": 0.9804810813214174,
|
| 77540 |
+
"grad_norm": 0.0574093721807003,
|
| 77541 |
+
"learning_rate": 1.0246292702056348e-06,
|
| 77542 |
+
"loss": 1.4473,
|
| 77543 |
+
"step": 22052
|
| 77544 |
+
},
|
| 77545 |
+
{
|
| 77546 |
+
"epoch": 0.9805700057800898,
|
| 77547 |
+
"grad_norm": 0.057217229157686234,
|
| 77548 |
+
"learning_rate": 1.0152964499594575e-06,
|
| 77549 |
+
"loss": 1.4416,
|
| 77550 |
+
"step": 22054
|
| 77551 |
+
},
|
| 77552 |
+
{
|
| 77553 |
+
"epoch": 0.9806589302387622,
|
| 77554 |
+
"grad_norm": 0.05822160094976425,
|
| 77555 |
+
"learning_rate": 1.0060062851247053e-06,
|
| 77556 |
+
"loss": 1.4464,
|
| 77557 |
+
"step": 22056
|
| 77558 |
+
},
|
| 77559 |
+
{
|
| 77560 |
+
"epoch": 0.9807478546974345,
|
| 77561 |
+
"grad_norm": 0.057691022753715515,
|
| 77562 |
+
"learning_rate": 9.967587764955211e-07,
|
| 77563 |
+
"loss": 1.4492,
|
| 77564 |
+
"step": 22058
|
| 77565 |
+
},
|
| 77566 |
+
{
|
| 77567 |
+
"epoch": 0.9808367791561069,
|
| 77568 |
+
"grad_norm": 0.056380316615104675,
|
| 77569 |
+
"learning_rate": 9.875539248624388e-07,
|
| 77570 |
+
"loss": 1.4399,
|
| 77571 |
+
"step": 22060
|
| 77572 |
+
},
|
| 77573 |
+
{
|
| 77574 |
+
"epoch": 0.9809257036147793,
|
| 77575 |
+
"grad_norm": 0.058779843151569366,
|
| 77576 |
+
"learning_rate": 9.783917310122737e-07,
|
| 77577 |
+
"loss": 1.4478,
|
| 77578 |
+
"step": 22062
|
| 77579 |
+
},
|
| 77580 |
+
{
|
| 77581 |
+
"epoch": 0.9810146280734516,
|
| 77582 |
+
"grad_norm": 0.058194488286972046,
|
| 77583 |
+
"learning_rate": 9.692721957283435e-07,
|
| 77584 |
+
"loss": 1.4477,
|
| 77585 |
+
"step": 22064
|
| 77586 |
+
},
|
| 77587 |
+
{
|
| 77588 |
+
"epoch": 0.981103552532124,
|
| 77589 |
+
"grad_norm": 0.05809153616428375,
|
| 77590 |
+
"learning_rate": 9.601953197901913e-07,
|
| 77591 |
+
"loss": 1.4381,
|
| 77592 |
+
"step": 22066
|
| 77593 |
+
},
|
| 77594 |
+
{
|
| 77595 |
+
"epoch": 0.9811924769907964,
|
| 77596 |
+
"grad_norm": 0.05672544613480568,
|
| 77597 |
+
"learning_rate": 9.511611039737522e-07,
|
| 77598 |
+
"loss": 1.4442,
|
| 77599 |
+
"step": 22068
|
| 77600 |
+
},
|
| 77601 |
+
{
|
| 77602 |
+
"epoch": 0.9812814014494686,
|
| 77603 |
+
"grad_norm": 0.058622874319553375,
|
| 77604 |
+
"learning_rate": 9.421695490512416e-07,
|
| 77605 |
+
"loss": 1.4394,
|
| 77606 |
+
"step": 22070
|
| 77607 |
+
},
|
| 77608 |
+
{
|
| 77609 |
+
"epoch": 0.981370325908141,
|
| 77610 |
+
"grad_norm": 0.0575464628636837,
|
| 77611 |
+
"learning_rate": 9.332206557914336e-07,
|
| 77612 |
+
"loss": 1.447,
|
| 77613 |
+
"step": 22072
|
| 77614 |
+
},
|
| 77615 |
+
{
|
| 77616 |
+
"epoch": 0.9814592503668134,
|
| 77617 |
+
"grad_norm": 0.057317234575748444,
|
| 77618 |
+
"learning_rate": 9.243144249591606e-07,
|
| 77619 |
+
"loss": 1.4467,
|
| 77620 |
+
"step": 22074
|
| 77621 |
+
},
|
| 77622 |
+
{
|
| 77623 |
+
"epoch": 0.9815481748254857,
|
| 77624 |
+
"grad_norm": 0.05799569934606552,
|
| 77625 |
+
"learning_rate": 9.154508573158693e-07,
|
| 77626 |
+
"loss": 1.4454,
|
| 77627 |
+
"step": 22076
|
| 77628 |
+
},
|
| 77629 |
+
{
|
| 77630 |
+
"epoch": 0.9816370992841581,
|
| 77631 |
+
"grad_norm": 0.05822969973087311,
|
| 77632 |
+
"learning_rate": 9.066299536192313e-07,
|
| 77633 |
+
"loss": 1.4448,
|
| 77634 |
+
"step": 22078
|
| 77635 |
+
},
|
| 77636 |
+
{
|
| 77637 |
+
"epoch": 0.9817260237428305,
|
| 77638 |
+
"grad_norm": 0.058205485343933105,
|
| 77639 |
+
"learning_rate": 8.9785171462331e-07,
|
| 77640 |
+
"loss": 1.455,
|
| 77641 |
+
"step": 22080
|
| 77642 |
+
},
|
| 77643 |
+
{
|
| 77644 |
+
"epoch": 0.9818149482015028,
|
| 77645 |
+
"grad_norm": 0.057623036205768585,
|
| 77646 |
+
"learning_rate": 8.891161410785053e-07,
|
| 77647 |
+
"loss": 1.4465,
|
| 77648 |
+
"step": 22082
|
| 77649 |
+
},
|
| 77650 |
+
{
|
| 77651 |
+
"epoch": 0.9819038726601752,
|
| 77652 |
+
"grad_norm": 0.056971605867147446,
|
| 77653 |
+
"learning_rate": 8.804232337315532e-07,
|
| 77654 |
+
"loss": 1.4439,
|
| 77655 |
+
"step": 22084
|
| 77656 |
+
},
|
| 77657 |
+
{
|
| 77658 |
+
"epoch": 0.9819927971188476,
|
| 77659 |
+
"grad_norm": 0.056623686105012894,
|
| 77660 |
+
"learning_rate": 8.717729933255814e-07,
|
| 77661 |
+
"loss": 1.4419,
|
| 77662 |
+
"step": 22086
|
| 77663 |
+
},
|
| 77664 |
+
{
|
| 77665 |
+
"epoch": 0.9820817215775199,
|
| 77666 |
+
"grad_norm": 0.05784169211983681,
|
| 77667 |
+
"learning_rate": 8.631654206000539e-07,
|
| 77668 |
+
"loss": 1.4461,
|
| 77669 |
+
"step": 22088
|
| 77670 |
+
},
|
| 77671 |
+
{
|
| 77672 |
+
"epoch": 0.9821706460361923,
|
| 77673 |
+
"grad_norm": 0.05700985714793205,
|
| 77674 |
+
"learning_rate": 8.546005162907156e-07,
|
| 77675 |
+
"loss": 1.4379,
|
| 77676 |
+
"step": 22090
|
| 77677 |
+
},
|
| 77678 |
+
{
|
| 77679 |
+
"epoch": 0.9822595704948646,
|
| 77680 |
+
"grad_norm": 0.057645298540592194,
|
| 77681 |
+
"learning_rate": 8.46078281129814e-07,
|
| 77682 |
+
"loss": 1.4479,
|
| 77683 |
+
"step": 22092
|
| 77684 |
+
},
|
| 77685 |
+
{
|
| 77686 |
+
"epoch": 0.9823484949535369,
|
| 77687 |
+
"grad_norm": 0.057107895612716675,
|
| 77688 |
+
"learning_rate": 8.375987158458775e-07,
|
| 77689 |
+
"loss": 1.448,
|
| 77690 |
+
"step": 22094
|
| 77691 |
+
},
|
| 77692 |
+
{
|
| 77693 |
+
"epoch": 0.9824374194122093,
|
| 77694 |
+
"grad_norm": 0.05772538483142853,
|
| 77695 |
+
"learning_rate": 8.291618211637153e-07,
|
| 77696 |
+
"loss": 1.4449,
|
| 77697 |
+
"step": 22096
|
| 77698 |
+
},
|
| 77699 |
+
{
|
| 77700 |
+
"epoch": 0.9825263438708817,
|
| 77701 |
+
"grad_norm": 0.05685894191265106,
|
| 77702 |
+
"learning_rate": 8.207675978045281e-07,
|
| 77703 |
+
"loss": 1.4485,
|
| 77704 |
+
"step": 22098
|
| 77705 |
+
},
|
| 77706 |
+
{
|
| 77707 |
+
"epoch": 0.982615268329554,
|
| 77708 |
+
"grad_norm": 0.058153510093688965,
|
| 77709 |
+
"learning_rate": 8.124160464859642e-07,
|
| 77710 |
+
"loss": 1.4477,
|
| 77711 |
+
"step": 22100
|
| 77712 |
+
},
|
| 77713 |
+
{
|
| 77714 |
+
"epoch": 0.9827041927882264,
|
| 77715 |
+
"grad_norm": 0.057993143796920776,
|
| 77716 |
+
"learning_rate": 8.041071679219525e-07,
|
| 77717 |
+
"loss": 1.4428,
|
| 77718 |
+
"step": 22102
|
| 77719 |
+
},
|
| 77720 |
+
{
|
| 77721 |
+
"epoch": 0.9827931172468988,
|
| 77722 |
+
"grad_norm": 0.0569118969142437,
|
| 77723 |
+
"learning_rate": 7.95840962822758e-07,
|
| 77724 |
+
"loss": 1.4464,
|
| 77725 |
+
"step": 22104
|
| 77726 |
+
},
|
| 77727 |
+
{
|
| 77728 |
+
"epoch": 0.9828820417055711,
|
| 77729 |
+
"grad_norm": 0.057352546602487564,
|
| 77730 |
+
"learning_rate": 7.876174318949824e-07,
|
| 77731 |
+
"loss": 1.4476,
|
| 77732 |
+
"step": 22106
|
| 77733 |
+
},
|
| 77734 |
+
{
|
| 77735 |
+
"epoch": 0.9829709661642435,
|
| 77736 |
+
"grad_norm": 0.05711887776851654,
|
| 77737 |
+
"learning_rate": 7.794365758416188e-07,
|
| 77738 |
+
"loss": 1.443,
|
| 77739 |
+
"step": 22108
|
| 77740 |
+
},
|
| 77741 |
+
{
|
| 77742 |
+
"epoch": 0.9830598906229159,
|
| 77743 |
+
"grad_norm": 0.058069389313459396,
|
| 77744 |
+
"learning_rate": 7.712983953619967e-07,
|
| 77745 |
+
"loss": 1.4485,
|
| 77746 |
+
"step": 22110
|
| 77747 |
+
},
|
| 77748 |
+
{
|
| 77749 |
+
"epoch": 0.9831488150815882,
|
| 77750 |
+
"grad_norm": 0.05813687667250633,
|
| 77751 |
+
"learning_rate": 7.632028911518374e-07,
|
| 77752 |
+
"loss": 1.4526,
|
| 77753 |
+
"step": 22112
|
| 77754 |
+
},
|
| 77755 |
+
{
|
| 77756 |
+
"epoch": 0.9832377395402605,
|
| 77757 |
+
"grad_norm": 0.05850476771593094,
|
| 77758 |
+
"learning_rate": 7.551500639031427e-07,
|
| 77759 |
+
"loss": 1.4463,
|
| 77760 |
+
"step": 22114
|
| 77761 |
+
},
|
| 77762 |
+
{
|
| 77763 |
+
"epoch": 0.9833266639989329,
|
| 77764 |
+
"grad_norm": 0.0575152225792408,
|
| 77765 |
+
"learning_rate": 7.471399143043067e-07,
|
| 77766 |
+
"loss": 1.4549,
|
| 77767 |
+
"step": 22116
|
| 77768 |
+
},
|
| 77769 |
+
{
|
| 77770 |
+
"epoch": 0.9834155884576052,
|
| 77771 |
+
"grad_norm": 0.05797769874334335,
|
| 77772 |
+
"learning_rate": 7.391724430401148e-07,
|
| 77773 |
+
"loss": 1.449,
|
| 77774 |
+
"step": 22118
|
| 77775 |
+
},
|
| 77776 |
+
{
|
| 77777 |
+
"epoch": 0.9835045129162776,
|
| 77778 |
+
"grad_norm": 0.056744713336229324,
|
| 77779 |
+
"learning_rate": 7.312476507916332e-07,
|
| 77780 |
+
"loss": 1.4456,
|
| 77781 |
+
"step": 22120
|
| 77782 |
+
},
|
| 77783 |
+
{
|
| 77784 |
+
"epoch": 0.98359343737495,
|
| 77785 |
+
"grad_norm": 0.057250816375017166,
|
| 77786 |
+
"learning_rate": 7.233655382363202e-07,
|
| 77787 |
+
"loss": 1.4396,
|
| 77788 |
+
"step": 22122
|
| 77789 |
+
},
|
| 77790 |
+
{
|
| 77791 |
+
"epoch": 0.9836823618336223,
|
| 77792 |
+
"grad_norm": 0.05758194625377655,
|
| 77793 |
+
"learning_rate": 7.155261060479701e-07,
|
| 77794 |
+
"loss": 1.4521,
|
| 77795 |
+
"step": 22124
|
| 77796 |
+
},
|
| 77797 |
+
{
|
| 77798 |
+
"epoch": 0.9837712862922947,
|
| 77799 |
+
"grad_norm": 0.05834776535630226,
|
| 77800 |
+
"learning_rate": 7.077293548966579e-07,
|
| 77801 |
+
"loss": 1.4438,
|
| 77802 |
+
"step": 22126
|
| 77803 |
+
},
|
| 77804 |
+
{
|
| 77805 |
+
"epoch": 0.9838602107509671,
|
| 77806 |
+
"grad_norm": 0.0584811232984066,
|
| 77807 |
+
"learning_rate": 6.999752854490171e-07,
|
| 77808 |
+
"loss": 1.4453,
|
| 77809 |
+
"step": 22128
|
| 77810 |
+
},
|
| 77811 |
+
{
|
| 77812 |
+
"epoch": 0.9839491352096394,
|
| 77813 |
+
"grad_norm": 0.05721158906817436,
|
| 77814 |
+
"learning_rate": 6.922638983677954e-07,
|
| 77815 |
+
"loss": 1.4502,
|
| 77816 |
+
"step": 22130
|
| 77817 |
+
},
|
| 77818 |
+
{
|
| 77819 |
+
"epoch": 0.9840380596683118,
|
| 77820 |
+
"grad_norm": 0.05716519057750702,
|
| 77821 |
+
"learning_rate": 6.84595194312243e-07,
|
| 77822 |
+
"loss": 1.4425,
|
| 77823 |
+
"step": 22132
|
| 77824 |
+
},
|
| 77825 |
+
{
|
| 77826 |
+
"epoch": 0.9841269841269841,
|
| 77827 |
+
"grad_norm": 0.05842083320021629,
|
| 77828 |
+
"learning_rate": 6.769691739378913e-07,
|
| 77829 |
+
"loss": 1.4459,
|
| 77830 |
+
"step": 22134
|
| 77831 |
+
},
|
| 77832 |
+
{
|
| 77833 |
+
"epoch": 0.9842159085856564,
|
| 77834 |
+
"grad_norm": 0.05762701854109764,
|
| 77835 |
+
"learning_rate": 6.693858378967188e-07,
|
| 77836 |
+
"loss": 1.4475,
|
| 77837 |
+
"step": 22136
|
| 77838 |
+
},
|
| 77839 |
+
{
|
| 77840 |
+
"epoch": 0.9843048330443288,
|
| 77841 |
+
"grad_norm": 0.0569646880030632,
|
| 77842 |
+
"learning_rate": 6.618451868368736e-07,
|
| 77843 |
+
"loss": 1.4453,
|
| 77844 |
+
"step": 22138
|
| 77845 |
+
},
|
| 77846 |
+
{
|
| 77847 |
+
"epoch": 0.9843937575030012,
|
| 77848 |
+
"grad_norm": 0.056726690381765366,
|
| 77849 |
+
"learning_rate": 6.543472214030066e-07,
|
| 77850 |
+
"loss": 1.4471,
|
| 77851 |
+
"step": 22140
|
| 77852 |
+
},
|
| 77853 |
+
{
|
| 77854 |
+
"epoch": 0.9844826819616735,
|
| 77855 |
+
"grad_norm": 0.056243959814310074,
|
| 77856 |
+
"learning_rate": 6.468919422361052e-07,
|
| 77857 |
+
"loss": 1.4397,
|
| 77858 |
+
"step": 22142
|
| 77859 |
+
},
|
| 77860 |
+
{
|
| 77861 |
+
"epoch": 0.9845716064203459,
|
| 77862 |
+
"grad_norm": 0.058017753064632416,
|
| 77863 |
+
"learning_rate": 6.394793499734375e-07,
|
| 77864 |
+
"loss": 1.4452,
|
| 77865 |
+
"step": 22144
|
| 77866 |
+
},
|
| 77867 |
+
{
|
| 77868 |
+
"epoch": 0.9846605308790183,
|
| 77869 |
+
"grad_norm": 0.057117003947496414,
|
| 77870 |
+
"learning_rate": 6.321094452487186e-07,
|
| 77871 |
+
"loss": 1.4457,
|
| 77872 |
+
"step": 22146
|
| 77873 |
+
},
|
| 77874 |
+
{
|
| 77875 |
+
"epoch": 0.9847494553376906,
|
| 77876 |
+
"grad_norm": 0.059118859469890594,
|
| 77877 |
+
"learning_rate": 6.247822286918892e-07,
|
| 77878 |
+
"loss": 1.4479,
|
| 77879 |
+
"step": 22148
|
| 77880 |
+
},
|
| 77881 |
+
{
|
| 77882 |
+
"epoch": 0.984838379796363,
|
| 77883 |
+
"grad_norm": 0.05744896084070206,
|
| 77884 |
+
"learning_rate": 6.174977009293925e-07,
|
| 77885 |
+
"loss": 1.4502,
|
| 77886 |
+
"step": 22150
|
| 77887 |
+
},
|
| 77888 |
+
{
|
| 77889 |
+
"epoch": 0.9849273042550354,
|
| 77890 |
+
"grad_norm": 0.05781065300107002,
|
| 77891 |
+
"learning_rate": 6.102558625838417e-07,
|
| 77892 |
+
"loss": 1.4416,
|
| 77893 |
+
"step": 22152
|
| 77894 |
+
},
|
| 77895 |
+
{
|
| 77896 |
+
"epoch": 0.9850162287137078,
|
| 77897 |
+
"grad_norm": 0.057693760842084885,
|
| 77898 |
+
"learning_rate": 6.030567142744081e-07,
|
| 77899 |
+
"loss": 1.4452,
|
| 77900 |
+
"step": 22154
|
| 77901 |
+
},
|
| 77902 |
+
{
|
| 77903 |
+
"epoch": 0.98510515317238,
|
| 77904 |
+
"grad_norm": 0.05738704651594162,
|
| 77905 |
+
"learning_rate": 5.959002566164328e-07,
|
| 77906 |
+
"loss": 1.4488,
|
| 77907 |
+
"step": 22156
|
| 77908 |
+
},
|
| 77909 |
+
{
|
| 77910 |
+
"epoch": 0.9851940776310524,
|
| 77911 |
+
"grad_norm": 0.05740581080317497,
|
| 77912 |
+
"learning_rate": 5.887864902217044e-07,
|
| 77913 |
+
"loss": 1.4462,
|
| 77914 |
+
"step": 22158
|
| 77915 |
+
},
|
| 77916 |
+
{
|
| 77917 |
+
"epoch": 0.9852830020897247,
|
| 77918 |
+
"grad_norm": 0.057811297476291656,
|
| 77919 |
+
"learning_rate": 5.817154156983473e-07,
|
| 77920 |
+
"loss": 1.4463,
|
| 77921 |
+
"step": 22160
|
| 77922 |
+
},
|
| 77923 |
+
{
|
| 77924 |
+
"epoch": 0.9853719265483971,
|
| 77925 |
+
"grad_norm": 0.0571342296898365,
|
| 77926 |
+
"learning_rate": 5.746870336508225e-07,
|
| 77927 |
+
"loss": 1.4455,
|
| 77928 |
+
"step": 22162
|
| 77929 |
+
},
|
| 77930 |
+
{
|
| 77931 |
+
"epoch": 0.9854608510070695,
|
| 77932 |
+
"grad_norm": 0.05744681507349014,
|
| 77933 |
+
"learning_rate": 5.677013446799828e-07,
|
| 77934 |
+
"loss": 1.4499,
|
| 77935 |
+
"step": 22164
|
| 77936 |
+
},
|
| 77937 |
+
{
|
| 77938 |
+
"epoch": 0.9855497754657419,
|
| 77939 |
+
"grad_norm": 0.05767764896154404,
|
| 77940 |
+
"learning_rate": 5.60758349382906e-07,
|
| 77941 |
+
"loss": 1.4425,
|
| 77942 |
+
"step": 22166
|
| 77943 |
+
},
|
| 77944 |
+
{
|
| 77945 |
+
"epoch": 0.9856386999244142,
|
| 77946 |
+
"grad_norm": 0.05785730853676796,
|
| 77947 |
+
"learning_rate": 5.53858048353173e-07,
|
| 77948 |
+
"loss": 1.4449,
|
| 77949 |
+
"step": 22168
|
| 77950 |
+
},
|
| 77951 |
+
{
|
| 77952 |
+
"epoch": 0.9857276243830866,
|
| 77953 |
+
"grad_norm": 0.058226075023412704,
|
| 77954 |
+
"learning_rate": 5.470004421806452e-07,
|
| 77955 |
+
"loss": 1.4434,
|
| 77956 |
+
"step": 22170
|
| 77957 |
+
},
|
| 77958 |
+
{
|
| 77959 |
+
"epoch": 0.985816548841759,
|
| 77960 |
+
"grad_norm": 0.057991281151771545,
|
| 77961 |
+
"learning_rate": 5.401855314515758e-07,
|
| 77962 |
+
"loss": 1.445,
|
| 77963 |
+
"step": 22172
|
| 77964 |
+
},
|
| 77965 |
+
{
|
| 77966 |
+
"epoch": 0.9859054733004313,
|
| 77967 |
+
"grad_norm": 0.05784721300005913,
|
| 77968 |
+
"learning_rate": 5.334133167484434e-07,
|
| 77969 |
+
"loss": 1.448,
|
| 77970 |
+
"step": 22174
|
| 77971 |
+
},
|
| 77972 |
+
{
|
| 77973 |
+
"epoch": 0.9859943977591037,
|
| 77974 |
+
"grad_norm": 0.05827952176332474,
|
| 77975 |
+
"learning_rate": 5.266837986502294e-07,
|
| 77976 |
+
"loss": 1.4506,
|
| 77977 |
+
"step": 22176
|
| 77978 |
+
},
|
| 77979 |
+
{
|
| 77980 |
+
"epoch": 0.986083322217776,
|
| 77981 |
+
"grad_norm": 0.05748755857348442,
|
| 77982 |
+
"learning_rate": 5.199969777321955e-07,
|
| 77983 |
+
"loss": 1.4483,
|
| 77984 |
+
"step": 22178
|
| 77985 |
+
},
|
| 77986 |
+
{
|
| 77987 |
+
"epoch": 0.9861722466764483,
|
| 77988 |
+
"grad_norm": 0.057960059493780136,
|
| 77989 |
+
"learning_rate": 5.13352854565996e-07,
|
| 77990 |
+
"loss": 1.4474,
|
| 77991 |
+
"step": 22180
|
| 77992 |
+
},
|
| 77993 |
+
{
|
| 77994 |
+
"epoch": 0.9862611711351207,
|
| 77995 |
+
"grad_norm": 0.05742307007312775,
|
| 77996 |
+
"learning_rate": 5.067514297195098e-07,
|
| 77997 |
+
"loss": 1.4512,
|
| 77998 |
+
"step": 22182
|
| 77999 |
+
},
|
| 78000 |
+
{
|
| 78001 |
+
"epoch": 0.986350095593793,
|
| 78002 |
+
"grad_norm": 0.05780716612935066,
|
| 78003 |
+
"learning_rate": 5.001927037571186e-07,
|
| 78004 |
+
"loss": 1.4459,
|
| 78005 |
+
"step": 22184
|
| 78006 |
+
},
|
| 78007 |
+
{
|
| 78008 |
+
"epoch": 0.9864390200524654,
|
| 78009 |
+
"grad_norm": 0.0577070526778698,
|
| 78010 |
+
"learning_rate": 4.936766772394851e-07,
|
| 78011 |
+
"loss": 1.4409,
|
| 78012 |
+
"step": 22186
|
| 78013 |
+
},
|
| 78014 |
+
{
|
| 78015 |
+
"epoch": 0.9865279445111378,
|
| 78016 |
+
"grad_norm": 0.057526715099811554,
|
| 78017 |
+
"learning_rate": 4.872033507236084e-07,
|
| 78018 |
+
"loss": 1.4471,
|
| 78019 |
+
"step": 22188
|
| 78020 |
+
},
|
| 78021 |
+
{
|
| 78022 |
+
"epoch": 0.9866168689698102,
|
| 78023 |
+
"grad_norm": 0.05801134556531906,
|
| 78024 |
+
"learning_rate": 4.80772724762879e-07,
|
| 78025 |
+
"loss": 1.4386,
|
| 78026 |
+
"step": 22190
|
| 78027 |
+
},
|
| 78028 |
+
{
|
| 78029 |
+
"epoch": 0.9867057934284825,
|
| 78030 |
+
"grad_norm": 0.05754433944821358,
|
| 78031 |
+
"learning_rate": 4.743847999070239e-07,
|
| 78032 |
+
"loss": 1.4458,
|
| 78033 |
+
"step": 22192
|
| 78034 |
+
},
|
| 78035 |
+
{
|
| 78036 |
+
"epoch": 0.9867947178871549,
|
| 78037 |
+
"grad_norm": 0.05766000971198082,
|
| 78038 |
+
"learning_rate": 4.680395767021062e-07,
|
| 78039 |
+
"loss": 1.4399,
|
| 78040 |
+
"step": 22194
|
| 78041 |
+
},
|
| 78042 |
+
{
|
| 78043 |
+
"epoch": 0.9868836423458273,
|
| 78044 |
+
"grad_norm": 0.0569644533097744,
|
| 78045 |
+
"learning_rate": 4.617370556904699e-07,
|
| 78046 |
+
"loss": 1.4417,
|
| 78047 |
+
"step": 22196
|
| 78048 |
+
},
|
| 78049 |
+
{
|
| 78050 |
+
"epoch": 0.9869725668044996,
|
| 78051 |
+
"grad_norm": 0.05827400088310242,
|
| 78052 |
+
"learning_rate": 4.554772374110172e-07,
|
| 78053 |
+
"loss": 1.4557,
|
| 78054 |
+
"step": 22198
|
| 78055 |
+
},
|
| 78056 |
+
{
|
| 78057 |
+
"epoch": 0.9870614912631719,
|
| 78058 |
+
"grad_norm": 0.05730379745364189,
|
| 78059 |
+
"learning_rate": 4.4926012239870916e-07,
|
| 78060 |
+
"loss": 1.4435,
|
| 78061 |
+
"step": 22200
|
| 78062 |
+
},
|
| 78063 |
+
{
|
| 78064 |
+
"epoch": 0.9871504157218443,
|
| 78065 |
+
"grad_norm": 0.05722169205546379,
|
| 78066 |
+
"learning_rate": 4.4308571118517605e-07,
|
| 78067 |
+
"loss": 1.4427,
|
| 78068 |
+
"step": 22202
|
| 78069 |
+
},
|
| 78070 |
+
{
|
| 78071 |
+
"epoch": 0.9872393401805166,
|
| 78072 |
+
"grad_norm": 0.05816062167286873,
|
| 78073 |
+
"learning_rate": 4.369540042981068e-07,
|
| 78074 |
+
"loss": 1.4448,
|
| 78075 |
+
"step": 22204
|
| 78076 |
+
},
|
| 78077 |
+
{
|
| 78078 |
+
"epoch": 0.987328264639189,
|
| 78079 |
+
"grad_norm": 0.05683821812272072,
|
| 78080 |
+
"learning_rate": 4.3086500226169334e-07,
|
| 78081 |
+
"loss": 1.445,
|
| 78082 |
+
"step": 22206
|
| 78083 |
+
},
|
| 78084 |
+
{
|
| 78085 |
+
"epoch": 0.9874171890978614,
|
| 78086 |
+
"grad_norm": 0.05709170922636986,
|
| 78087 |
+
"learning_rate": 4.248187055965191e-07,
|
| 78088 |
+
"loss": 1.4453,
|
| 78089 |
+
"step": 22208
|
| 78090 |
+
},
|
| 78091 |
+
{
|
| 78092 |
+
"epoch": 0.9875061135565337,
|
| 78093 |
+
"grad_norm": 0.05717954784631729,
|
| 78094 |
+
"learning_rate": 4.1881511481939304e-07,
|
| 78095 |
+
"loss": 1.4421,
|
| 78096 |
+
"step": 22210
|
| 78097 |
+
},
|
| 78098 |
+
{
|
| 78099 |
+
"epoch": 0.9875950380152061,
|
| 78100 |
+
"grad_norm": 0.05784517154097557,
|
| 78101 |
+
"learning_rate": 4.1285423044351565e-07,
|
| 78102 |
+
"loss": 1.4496,
|
| 78103 |
+
"step": 22212
|
| 78104 |
+
},
|
| 78105 |
+
{
|
| 78106 |
+
"epoch": 0.9876839624738785,
|
| 78107 |
+
"grad_norm": 0.057573914527893066,
|
| 78108 |
+
"learning_rate": 4.0693605297842384e-07,
|
| 78109 |
+
"loss": 1.4458,
|
| 78110 |
+
"step": 22214
|
| 78111 |
+
},
|
| 78112 |
+
{
|
| 78113 |
+
"epoch": 0.9877728869325508,
|
| 78114 |
+
"grad_norm": 0.056905996054410934,
|
| 78115 |
+
"learning_rate": 4.0106058293015723e-07,
|
| 78116 |
+
"loss": 1.4495,
|
| 78117 |
+
"step": 22216
|
| 78118 |
+
},
|
| 78119 |
+
{
|
| 78120 |
+
"epoch": 0.9878618113912232,
|
| 78121 |
+
"grad_norm": 0.05849708244204521,
|
| 78122 |
+
"learning_rate": 3.952278208008697e-07,
|
| 78123 |
+
"loss": 1.4523,
|
| 78124 |
+
"step": 22218
|
| 78125 |
+
},
|
| 78126 |
+
{
|
| 78127 |
+
"epoch": 0.9879507358498956,
|
| 78128 |
+
"grad_norm": 0.0573749914765358,
|
| 78129 |
+
"learning_rate": 3.8943776708916247e-07,
|
| 78130 |
+
"loss": 1.4409,
|
| 78131 |
+
"step": 22220
|
| 78132 |
+
},
|
| 78133 |
+
{
|
| 78134 |
+
"epoch": 0.9880396603085678,
|
| 78135 |
+
"grad_norm": 0.05719359219074249,
|
| 78136 |
+
"learning_rate": 3.836904222900284e-07,
|
| 78137 |
+
"loss": 1.4437,
|
| 78138 |
+
"step": 22222
|
| 78139 |
+
},
|
| 78140 |
+
{
|
| 78141 |
+
"epoch": 0.9881285847672402,
|
| 78142 |
+
"grad_norm": 0.058678288012742996,
|
| 78143 |
+
"learning_rate": 3.779857868947967e-07,
|
| 78144 |
+
"loss": 1.4416,
|
| 78145 |
+
"step": 22224
|
| 78146 |
+
},
|
| 78147 |
+
{
|
| 78148 |
+
"epoch": 0.9882175092259126,
|
| 78149 |
+
"grad_norm": 0.058080919086933136,
|
| 78150 |
+
"learning_rate": 3.723238613910773e-07,
|
| 78151 |
+
"loss": 1.4414,
|
| 78152 |
+
"step": 22226
|
| 78153 |
+
},
|
| 78154 |
+
{
|
| 78155 |
+
"epoch": 0.9883064336845849,
|
| 78156 |
+
"grad_norm": 0.05763470381498337,
|
| 78157 |
+
"learning_rate": 3.6670464626292754e-07,
|
| 78158 |
+
"loss": 1.4479,
|
| 78159 |
+
"step": 22228
|
| 78160 |
+
},
|
| 78161 |
+
{
|
| 78162 |
+
"epoch": 0.9883953581432573,
|
| 78163 |
+
"grad_norm": 0.0569768100976944,
|
| 78164 |
+
"learning_rate": 3.611281419906853e-07,
|
| 78165 |
+
"loss": 1.4533,
|
| 78166 |
+
"step": 22230
|
| 78167 |
+
},
|
| 78168 |
+
{
|
| 78169 |
+
"epoch": 0.9884842826019297,
|
| 78170 |
+
"grad_norm": 0.05751827359199524,
|
| 78171 |
+
"learning_rate": 3.5559434905102497e-07,
|
| 78172 |
+
"loss": 1.4468,
|
| 78173 |
+
"step": 22232
|
| 78174 |
+
},
|
| 78175 |
+
{
|
| 78176 |
+
"epoch": 0.988573207060602,
|
| 78177 |
+
"grad_norm": 0.057300738990306854,
|
| 78178 |
+
"learning_rate": 3.501032679170124e-07,
|
| 78179 |
+
"loss": 1.4462,
|
| 78180 |
+
"step": 22234
|
| 78181 |
+
},
|
| 78182 |
+
{
|
| 78183 |
+
"epoch": 0.9886621315192744,
|
| 78184 |
+
"grad_norm": 0.057894255965948105,
|
| 78185 |
+
"learning_rate": 3.4465489905810555e-07,
|
| 78186 |
+
"loss": 1.444,
|
| 78187 |
+
"step": 22236
|
| 78188 |
+
},
|
| 78189 |
+
{
|
| 78190 |
+
"epoch": 0.9887510559779468,
|
| 78191 |
+
"grad_norm": 0.05746915191411972,
|
| 78192 |
+
"learning_rate": 3.392492429399874e-07,
|
| 78193 |
+
"loss": 1.4461,
|
| 78194 |
+
"step": 22238
|
| 78195 |
+
},
|
| 78196 |
+
{
|
| 78197 |
+
"epoch": 0.9888399804366191,
|
| 78198 |
+
"grad_norm": 0.05837760120630264,
|
| 78199 |
+
"learning_rate": 3.3388630002473273e-07,
|
| 78200 |
+
"loss": 1.4482,
|
| 78201 |
+
"step": 22240
|
| 78202 |
+
},
|
| 78203 |
+
{
|
| 78204 |
+
"epoch": 0.9889289048952915,
|
| 78205 |
+
"grad_norm": 0.05805863440036774,
|
| 78206 |
+
"learning_rate": 3.2856607077086374e-07,
|
| 78207 |
+
"loss": 1.4444,
|
| 78208 |
+
"step": 22242
|
| 78209 |
+
},
|
| 78210 |
+
{
|
| 78211 |
+
"epoch": 0.9890178293539638,
|
| 78212 |
+
"grad_norm": 0.056360069662332535,
|
| 78213 |
+
"learning_rate": 3.2328855563318326e-07,
|
| 78214 |
+
"loss": 1.4449,
|
| 78215 |
+
"step": 22244
|
| 78216 |
+
},
|
| 78217 |
+
{
|
| 78218 |
+
"epoch": 0.9891067538126361,
|
| 78219 |
+
"grad_norm": 0.05757332220673561,
|
| 78220 |
+
"learning_rate": 3.180537550627749e-07,
|
| 78221 |
+
"loss": 1.4413,
|
| 78222 |
+
"step": 22246
|
| 78223 |
+
},
|
| 78224 |
+
{
|
| 78225 |
+
"epoch": 0.9891956782713085,
|
| 78226 |
+
"grad_norm": 0.05767189711332321,
|
| 78227 |
+
"learning_rate": 3.1286166950711405e-07,
|
| 78228 |
+
"loss": 1.4476,
|
| 78229 |
+
"step": 22248
|
| 78230 |
+
},
|
| 78231 |
+
{
|
| 78232 |
+
"epoch": 0.9892846027299809,
|
| 78233 |
+
"grad_norm": 0.058686595410108566,
|
| 78234 |
+
"learning_rate": 3.0771229941012337e-07,
|
| 78235 |
+
"loss": 1.446,
|
| 78236 |
+
"step": 22250
|
| 78237 |
+
},
|
| 78238 |
+
{
|
| 78239 |
+
"epoch": 0.9893735271886532,
|
| 78240 |
+
"grad_norm": 0.05774107575416565,
|
| 78241 |
+
"learning_rate": 3.026056452119508e-07,
|
| 78242 |
+
"loss": 1.4448,
|
| 78243 |
+
"step": 22252
|
| 78244 |
+
},
|
| 78245 |
+
{
|
| 78246 |
+
"epoch": 0.9894624516473256,
|
| 78247 |
+
"grad_norm": 0.05755819380283356,
|
| 78248 |
+
"learning_rate": 2.975417073491915e-07,
|
| 78249 |
+
"loss": 1.4493,
|
| 78250 |
+
"step": 22254
|
| 78251 |
+
},
|
| 78252 |
+
{
|
| 78253 |
+
"epoch": 0.989551376105998,
|
| 78254 |
+
"grad_norm": 0.057851407676935196,
|
| 78255 |
+
"learning_rate": 2.9252048625461047e-07,
|
| 78256 |
+
"loss": 1.4421,
|
| 78257 |
+
"step": 22256
|
| 78258 |
+
},
|
| 78259 |
+
{
|
| 78260 |
+
"epoch": 0.9896403005646703,
|
| 78261 |
+
"grad_norm": 0.05904214829206467,
|
| 78262 |
+
"learning_rate": 2.8754198235758643e-07,
|
| 78263 |
+
"loss": 1.4491,
|
| 78264 |
+
"step": 22258
|
| 78265 |
+
},
|
| 78266 |
+
{
|
| 78267 |
+
"epoch": 0.9897292250233427,
|
| 78268 |
+
"grad_norm": 0.0583941750228405,
|
| 78269 |
+
"learning_rate": 2.826061960836124e-07,
|
| 78270 |
+
"loss": 1.4429,
|
| 78271 |
+
"step": 22260
|
| 78272 |
+
},
|
| 78273 |
+
{
|
| 78274 |
+
"epoch": 0.9898181494820151,
|
| 78275 |
+
"grad_norm": 0.05874110758304596,
|
| 78276 |
+
"learning_rate": 2.7771312785462853e-07,
|
| 78277 |
+
"loss": 1.4454,
|
| 78278 |
+
"step": 22262
|
| 78279 |
+
},
|
| 78280 |
+
{
|
| 78281 |
+
"epoch": 0.9899070739406873,
|
| 78282 |
+
"grad_norm": 0.05823364108800888,
|
| 78283 |
+
"learning_rate": 2.7286277808891145e-07,
|
| 78284 |
+
"loss": 1.45,
|
| 78285 |
+
"step": 22264
|
| 78286 |
+
},
|
| 78287 |
+
{
|
| 78288 |
+
"epoch": 0.9899959983993597,
|
| 78289 |
+
"grad_norm": 0.056944265961647034,
|
| 78290 |
+
"learning_rate": 2.6805514720112946e-07,
|
| 78291 |
+
"loss": 1.4472,
|
| 78292 |
+
"step": 22266
|
| 78293 |
+
},
|
| 78294 |
+
{
|
| 78295 |
+
"epoch": 0.9900849228580321,
|
| 78296 |
+
"grad_norm": 0.05789085850119591,
|
| 78297 |
+
"learning_rate": 2.632902356022315e-07,
|
| 78298 |
+
"loss": 1.4496,
|
| 78299 |
+
"step": 22268
|
| 78300 |
+
},
|
| 78301 |
+
{
|
| 78302 |
+
"epoch": 0.9901738473167044,
|
| 78303 |
+
"grad_norm": 0.058085713535547256,
|
| 78304 |
+
"learning_rate": 2.5856804369955854e-07,
|
| 78305 |
+
"loss": 1.4492,
|
| 78306 |
+
"step": 22270
|
| 78307 |
+
},
|
| 78308 |
+
{
|
| 78309 |
+
"epoch": 0.9902627717753768,
|
| 78310 |
+
"grad_norm": 0.05807514116168022,
|
| 78311 |
+
"learning_rate": 2.538885718967876e-07,
|
| 78312 |
+
"loss": 1.4439,
|
| 78313 |
+
"step": 22272
|
| 78314 |
+
},
|
| 78315 |
+
{
|
| 78316 |
+
"epoch": 0.9903516962340492,
|
| 78317 |
+
"grad_norm": 0.057060327380895615,
|
| 78318 |
+
"learning_rate": 2.49251820593932e-07,
|
| 78319 |
+
"loss": 1.4408,
|
| 78320 |
+
"step": 22274
|
| 78321 |
+
},
|
| 78322 |
+
{
|
| 78323 |
+
"epoch": 0.9904406206927215,
|
| 78324 |
+
"grad_norm": 0.057622525840997696,
|
| 78325 |
+
"learning_rate": 2.44657790187397e-07,
|
| 78326 |
+
"loss": 1.4477,
|
| 78327 |
+
"step": 22276
|
| 78328 |
+
},
|
| 78329 |
+
{
|
| 78330 |
+
"epoch": 0.9905295451513939,
|
| 78331 |
+
"grad_norm": 0.056390151381492615,
|
| 78332 |
+
"learning_rate": 2.401064810698128e-07,
|
| 78333 |
+
"loss": 1.4408,
|
| 78334 |
+
"step": 22278
|
| 78335 |
+
},
|
| 78336 |
+
{
|
| 78337 |
+
"epoch": 0.9906184696100663,
|
| 78338 |
+
"grad_norm": 0.05661400407552719,
|
| 78339 |
+
"learning_rate": 2.355978936303127e-07,
|
| 78340 |
+
"loss": 1.4452,
|
| 78341 |
+
"step": 22280
|
| 78342 |
+
},
|
| 78343 |
+
{
|
| 78344 |
+
"epoch": 0.9907073940687386,
|
| 78345 |
+
"grad_norm": 0.057079821825027466,
|
| 78346 |
+
"learning_rate": 2.3113202825425505e-07,
|
| 78347 |
+
"loss": 1.4395,
|
| 78348 |
+
"step": 22282
|
| 78349 |
+
},
|
| 78350 |
+
{
|
| 78351 |
+
"epoch": 0.990796318527411,
|
| 78352 |
+
"grad_norm": 0.05696665868163109,
|
| 78353 |
+
"learning_rate": 2.267088853235011e-07,
|
| 78354 |
+
"loss": 1.447,
|
| 78355 |
+
"step": 22284
|
| 78356 |
+
},
|
| 78357 |
+
{
|
| 78358 |
+
"epoch": 0.9908852429860833,
|
| 78359 |
+
"grad_norm": 0.056844066828489304,
|
| 78360 |
+
"learning_rate": 2.2232846521608175e-07,
|
| 78361 |
+
"loss": 1.4413,
|
| 78362 |
+
"step": 22286
|
| 78363 |
+
},
|
| 78364 |
+
{
|
| 78365 |
+
"epoch": 0.9909741674447556,
|
| 78366 |
+
"grad_norm": 0.05780512094497681,
|
| 78367 |
+
"learning_rate": 2.1799076830647525e-07,
|
| 78368 |
+
"loss": 1.4417,
|
| 78369 |
+
"step": 22288
|
| 78370 |
+
},
|
| 78371 |
+
{
|
| 78372 |
+
"epoch": 0.991063091903428,
|
| 78373 |
+
"grad_norm": 0.05735721439123154,
|
| 78374 |
+
"learning_rate": 2.1369579496549607e-07,
|
| 78375 |
+
"loss": 1.4478,
|
| 78376 |
+
"step": 22290
|
| 78377 |
+
},
|
| 78378 |
+
{
|
| 78379 |
+
"epoch": 0.9911520163621004,
|
| 78380 |
+
"grad_norm": 0.05718429014086723,
|
| 78381 |
+
"learning_rate": 2.0944354556023947e-07,
|
| 78382 |
+
"loss": 1.4469,
|
| 78383 |
+
"step": 22292
|
| 78384 |
+
},
|
| 78385 |
+
{
|
| 78386 |
+
"epoch": 0.9912409408207727,
|
| 78387 |
+
"grad_norm": 0.05797044187784195,
|
| 78388 |
+
"learning_rate": 2.0523402045430352e-07,
|
| 78389 |
+
"loss": 1.4446,
|
| 78390 |
+
"step": 22294
|
| 78391 |
+
},
|
| 78392 |
+
{
|
| 78393 |
+
"epoch": 0.9913298652794451,
|
| 78394 |
+
"grad_norm": 0.05726039782166481,
|
| 78395 |
+
"learning_rate": 2.0106722000745592e-07,
|
| 78396 |
+
"loss": 1.4418,
|
| 78397 |
+
"step": 22296
|
| 78398 |
+
},
|
| 78399 |
+
{
|
| 78400 |
+
"epoch": 0.9914187897381175,
|
| 78401 |
+
"grad_norm": 0.05672242492437363,
|
| 78402 |
+
"learning_rate": 1.969431445759673e-07,
|
| 78403 |
+
"loss": 1.4446,
|
| 78404 |
+
"step": 22298
|
| 78405 |
+
},
|
| 78406 |
+
{
|
| 78407 |
+
"epoch": 0.9915077141967898,
|
| 78408 |
+
"grad_norm": 0.057257115840911865,
|
| 78409 |
+
"learning_rate": 1.9286179451227792e-07,
|
| 78410 |
+
"loss": 1.448,
|
| 78411 |
+
"step": 22300
|
| 78412 |
+
},
|
| 78413 |
+
{
|
| 78414 |
+
"epoch": 0.9915966386554622,
|
| 78415 |
+
"grad_norm": 0.05845504254102707,
|
| 78416 |
+
"learning_rate": 1.8882317016538642e-07,
|
| 78417 |
+
"loss": 1.4445,
|
| 78418 |
+
"step": 22302
|
| 78419 |
+
},
|
| 78420 |
+
{
|
| 78421 |
+
"epoch": 0.9916855631141346,
|
| 78422 |
+
"grad_norm": 0.058910876512527466,
|
| 78423 |
+
"learning_rate": 1.8482727188046112e-07,
|
| 78424 |
+
"loss": 1.4505,
|
| 78425 |
+
"step": 22304
|
| 78426 |
+
},
|
| 78427 |
+
{
|
| 78428 |
+
"epoch": 0.991774487572807,
|
| 78429 |
+
"grad_norm": 0.05711057409644127,
|
| 78430 |
+
"learning_rate": 1.8087409999911763e-07,
|
| 78431 |
+
"loss": 1.4427,
|
| 78432 |
+
"step": 22306
|
| 78433 |
+
},
|
| 78434 |
+
{
|
| 78435 |
+
"epoch": 0.9918634120314792,
|
| 78436 |
+
"grad_norm": 0.05767577514052391,
|
| 78437 |
+
"learning_rate": 1.7696365485930787e-07,
|
| 78438 |
+
"loss": 1.4478,
|
| 78439 |
+
"step": 22308
|
| 78440 |
+
},
|
| 78441 |
+
{
|
| 78442 |
+
"epoch": 0.9919523364901516,
|
| 78443 |
+
"grad_norm": 0.057249389588832855,
|
| 78444 |
+
"learning_rate": 1.7309593679526447e-07,
|
| 78445 |
+
"loss": 1.4401,
|
| 78446 |
+
"step": 22310
|
| 78447 |
+
},
|
| 78448 |
+
{
|
| 78449 |
+
"epoch": 0.9920412609488239,
|
| 78450 |
+
"grad_norm": 0.05789843201637268,
|
| 78451 |
+
"learning_rate": 1.6927094613766736e-07,
|
| 78452 |
+
"loss": 1.4432,
|
| 78453 |
+
"step": 22312
|
| 78454 |
+
},
|
| 78455 |
+
{
|
| 78456 |
+
"epoch": 0.9921301854074963,
|
| 78457 |
+
"grad_norm": 0.05697624757885933,
|
| 78458 |
+
"learning_rate": 1.654886832134772e-07,
|
| 78459 |
+
"loss": 1.4426,
|
| 78460 |
+
"step": 22314
|
| 78461 |
+
},
|
| 78462 |
+
{
|
| 78463 |
+
"epoch": 0.9922191098661687,
|
| 78464 |
+
"grad_norm": 0.058805204927921295,
|
| 78465 |
+
"learning_rate": 1.6174914834599097e-07,
|
| 78466 |
+
"loss": 1.4463,
|
| 78467 |
+
"step": 22316
|
| 78468 |
+
},
|
| 78469 |
+
{
|
| 78470 |
+
"epoch": 0.992308034324841,
|
| 78471 |
+
"grad_norm": 0.05726942420005798,
|
| 78472 |
+
"learning_rate": 1.5805234185495287e-07,
|
| 78473 |
+
"loss": 1.4472,
|
| 78474 |
+
"step": 22318
|
| 78475 |
+
},
|
| 78476 |
+
{
|
| 78477 |
+
"epoch": 0.9923969587835134,
|
| 78478 |
+
"grad_norm": 0.05799218639731407,
|
| 78479 |
+
"learning_rate": 1.5439826405627688e-07,
|
| 78480 |
+
"loss": 1.4422,
|
| 78481 |
+
"step": 22320
|
| 78482 |
+
},
|
| 78483 |
+
{
|
| 78484 |
+
"epoch": 0.9924858832421858,
|
| 78485 |
+
"grad_norm": 0.05797122046351433,
|
| 78486 |
+
"learning_rate": 1.5078691526243526e-07,
|
| 78487 |
+
"loss": 1.4429,
|
| 78488 |
+
"step": 22322
|
| 78489 |
+
},
|
| 78490 |
+
{
|
| 78491 |
+
"epoch": 0.9925748077008582,
|
| 78492 |
+
"grad_norm": 0.057014547288417816,
|
| 78493 |
+
"learning_rate": 1.4721829578207002e-07,
|
| 78494 |
+
"loss": 1.4429,
|
| 78495 |
+
"step": 22324
|
| 78496 |
+
},
|
| 78497 |
+
{
|
| 78498 |
+
"epoch": 0.9926637321595305,
|
| 78499 |
+
"grad_norm": 0.05727580189704895,
|
| 78500 |
+
"learning_rate": 1.4369240592021493e-07,
|
| 78501 |
+
"loss": 1.4485,
|
| 78502 |
+
"step": 22326
|
| 78503 |
+
},
|
| 78504 |
+
{
|
| 78505 |
+
"epoch": 0.9927526566182029,
|
| 78506 |
+
"grad_norm": 0.05705159902572632,
|
| 78507 |
+
"learning_rate": 1.4020924597840656e-07,
|
| 78508 |
+
"loss": 1.4497,
|
| 78509 |
+
"step": 22328
|
| 78510 |
+
},
|
| 78511 |
+
{
|
| 78512 |
+
"epoch": 0.9928415810768751,
|
| 78513 |
+
"grad_norm": 0.05742578208446503,
|
| 78514 |
+
"learning_rate": 1.3676881625424021e-07,
|
| 78515 |
+
"loss": 1.4469,
|
| 78516 |
+
"step": 22330
|
| 78517 |
+
},
|
| 78518 |
+
{
|
| 78519 |
+
"epoch": 0.9929305055355475,
|
| 78520 |
+
"grad_norm": 0.058664653450250626,
|
| 78521 |
+
"learning_rate": 1.3337111704198047e-07,
|
| 78522 |
+
"loss": 1.4475,
|
| 78523 |
+
"step": 22332
|
| 78524 |
+
},
|
| 78525 |
+
{
|
| 78526 |
+
"epoch": 0.9930194299942199,
|
| 78527 |
+
"grad_norm": 0.05729004368185997,
|
| 78528 |
+
"learning_rate": 1.3001614863195066e-07,
|
| 78529 |
+
"loss": 1.4456,
|
| 78530 |
+
"step": 22334
|
| 78531 |
+
},
|
| 78532 |
+
{
|
| 78533 |
+
"epoch": 0.9931083544528923,
|
| 78534 |
+
"grad_norm": 0.05801570415496826,
|
| 78535 |
+
"learning_rate": 1.2670391131103242e-07,
|
| 78536 |
+
"loss": 1.4544,
|
| 78537 |
+
"step": 22336
|
| 78538 |
+
},
|
| 78539 |
+
{
|
| 78540 |
+
"epoch": 0.9931972789115646,
|
| 78541 |
+
"grad_norm": 0.05761876702308655,
|
| 78542 |
+
"learning_rate": 1.2343440536227714e-07,
|
| 78543 |
+
"loss": 1.4461,
|
| 78544 |
+
"step": 22338
|
| 78545 |
+
},
|
| 78546 |
+
{
|
| 78547 |
+
"epoch": 0.993286203370237,
|
| 78548 |
+
"grad_norm": 0.05685674026608467,
|
| 78549 |
+
"learning_rate": 1.2020763106529443e-07,
|
| 78550 |
+
"loss": 1.4403,
|
| 78551 |
+
"step": 22340
|
| 78552 |
+
},
|
| 78553 |
+
{
|
| 78554 |
+
"epoch": 0.9933751278289094,
|
| 78555 |
+
"grad_norm": 0.05733815208077431,
|
| 78556 |
+
"learning_rate": 1.170235886958082e-07,
|
| 78557 |
+
"loss": 1.4474,
|
| 78558 |
+
"step": 22342
|
| 78559 |
+
},
|
| 78560 |
+
{
|
| 78561 |
+
"epoch": 0.9934640522875817,
|
| 78562 |
+
"grad_norm": 0.057545486837625504,
|
| 78563 |
+
"learning_rate": 1.1388227852610066e-07,
|
| 78564 |
+
"loss": 1.445,
|
| 78565 |
+
"step": 22344
|
| 78566 |
+
},
|
| 78567 |
+
{
|
| 78568 |
+
"epoch": 0.9935529767462541,
|
| 78569 |
+
"grad_norm": 0.0574163943529129,
|
| 78570 |
+
"learning_rate": 1.1078370082467926e-07,
|
| 78571 |
+
"loss": 1.4422,
|
| 78572 |
+
"step": 22346
|
| 78573 |
+
},
|
| 78574 |
+
{
|
| 78575 |
+
"epoch": 0.9936419012049265,
|
| 78576 |
+
"grad_norm": 0.05813299119472504,
|
| 78577 |
+
"learning_rate": 1.0772785585633215e-07,
|
| 78578 |
+
"loss": 1.4507,
|
| 78579 |
+
"step": 22348
|
| 78580 |
+
},
|
| 78581 |
+
{
|
| 78582 |
+
"epoch": 0.9937308256635988,
|
| 78583 |
+
"grad_norm": 0.05734492093324661,
|
| 78584 |
+
"learning_rate": 1.0471474388240587e-07,
|
| 78585 |
+
"loss": 1.4422,
|
| 78586 |
+
"step": 22350
|
| 78587 |
+
},
|
| 78588 |
+
{
|
| 78589 |
+
"epoch": 0.9938197501222711,
|
| 78590 |
+
"grad_norm": 0.05803303048014641,
|
| 78591 |
+
"learning_rate": 1.0174436516047214e-07,
|
| 78592 |
+
"loss": 1.4482,
|
| 78593 |
+
"step": 22352
|
| 78594 |
+
},
|
| 78595 |
+
{
|
| 78596 |
+
"epoch": 0.9939086745809435,
|
| 78597 |
+
"grad_norm": 0.058048054575920105,
|
| 78598 |
+
"learning_rate": 9.881671994432795e-08,
|
| 78599 |
+
"loss": 1.4479,
|
| 78600 |
+
"step": 22354
|
| 78601 |
+
},
|
| 78602 |
+
{
|
| 78603 |
+
"epoch": 0.9939975990396158,
|
| 78604 |
+
"grad_norm": 0.057730190455913544,
|
| 78605 |
+
"learning_rate": 9.59318084843841e-08,
|
| 78606 |
+
"loss": 1.4538,
|
| 78607 |
+
"step": 22356
|
| 78608 |
+
},
|
| 78609 |
+
{
|
| 78610 |
+
"epoch": 0.9940865234982882,
|
| 78611 |
+
"grad_norm": 0.05775808170437813,
|
| 78612 |
+
"learning_rate": 9.308963102716561e-08,
|
| 78613 |
+
"loss": 1.442,
|
| 78614 |
+
"step": 22358
|
| 78615 |
+
},
|
| 78616 |
+
{
|
| 78617 |
+
"epoch": 0.9941754479569606,
|
| 78618 |
+
"grad_norm": 0.05732406675815582,
|
| 78619 |
+
"learning_rate": 9.029018781570031e-08,
|
| 78620 |
+
"loss": 1.447,
|
| 78621 |
+
"step": 22360
|
| 78622 |
+
},
|
| 78623 |
+
{
|
| 78624 |
+
"epoch": 0.9942643724156329,
|
| 78625 |
+
"grad_norm": 0.05907835438847542,
|
| 78626 |
+
"learning_rate": 8.753347908924126e-08,
|
| 78627 |
+
"loss": 1.4477,
|
| 78628 |
+
"step": 22362
|
| 78629 |
+
},
|
| 78630 |
+
{
|
| 78631 |
+
"epoch": 0.9943532968743053,
|
| 78632 |
+
"grad_norm": 0.05664451792836189,
|
| 78633 |
+
"learning_rate": 8.481950508343328e-08,
|
| 78634 |
+
"loss": 1.4409,
|
| 78635 |
+
"step": 22364
|
| 78636 |
+
},
|
| 78637 |
+
{
|
| 78638 |
+
"epoch": 0.9944422213329777,
|
| 78639 |
+
"grad_norm": 0.05807731673121452,
|
| 78640 |
+
"learning_rate": 8.214826603031301e-08,
|
| 78641 |
+
"loss": 1.4443,
|
| 78642 |
+
"step": 22366
|
| 78643 |
+
},
|
| 78644 |
+
{
|
| 78645 |
+
"epoch": 0.99453114579165,
|
| 78646 |
+
"grad_norm": 0.0581711083650589,
|
| 78647 |
+
"learning_rate": 7.951976215825329e-08,
|
| 78648 |
+
"loss": 1.4511,
|
| 78649 |
+
"step": 22368
|
| 78650 |
+
},
|
| 78651 |
+
{
|
| 78652 |
+
"epoch": 0.9946200702503224,
|
| 78653 |
+
"grad_norm": 0.057496510446071625,
|
| 78654 |
+
"learning_rate": 7.693399369190779e-08,
|
| 78655 |
+
"loss": 1.4406,
|
| 78656 |
+
"step": 22370
|
| 78657 |
+
},
|
| 78658 |
+
{
|
| 78659 |
+
"epoch": 0.9947089947089947,
|
| 78660 |
+
"grad_norm": 0.05667787045240402,
|
| 78661 |
+
"learning_rate": 7.43909608523774e-08,
|
| 78662 |
+
"loss": 1.4445,
|
| 78663 |
+
"step": 22372
|
| 78664 |
+
},
|
| 78665 |
+
{
|
| 78666 |
+
"epoch": 0.994797919167667,
|
| 78667 |
+
"grad_norm": 0.05935773253440857,
|
| 78668 |
+
"learning_rate": 7.189066385693276e-08,
|
| 78669 |
+
"loss": 1.4474,
|
| 78670 |
+
"step": 22374
|
| 78671 |
+
},
|
| 78672 |
+
{
|
| 78673 |
+
"epoch": 0.9948868436263394,
|
| 78674 |
+
"grad_norm": 0.05779668688774109,
|
| 78675 |
+
"learning_rate": 6.943310291945836e-08,
|
| 78676 |
+
"loss": 1.4459,
|
| 78677 |
+
"step": 22376
|
| 78678 |
+
},
|
| 78679 |
+
{
|
| 78680 |
+
"epoch": 0.9949757680850118,
|
| 78681 |
+
"grad_norm": 0.056512489914894104,
|
| 78682 |
+
"learning_rate": 6.701827824989737e-08,
|
| 78683 |
+
"loss": 1.4443,
|
| 78684 |
+
"step": 22378
|
| 78685 |
+
},
|
| 78686 |
+
{
|
| 78687 |
+
"epoch": 0.9950646925436841,
|
| 78688 |
+
"grad_norm": 0.058668848127126694,
|
| 78689 |
+
"learning_rate": 6.464619005480676e-08,
|
| 78690 |
+
"loss": 1.4499,
|
| 78691 |
+
"step": 22380
|
| 78692 |
+
},
|
| 78693 |
+
{
|
| 78694 |
+
"epoch": 0.9951536170023565,
|
| 78695 |
+
"grad_norm": 0.05988642945885658,
|
| 78696 |
+
"learning_rate": 6.231683853685776e-08,
|
| 78697 |
+
"loss": 1.4459,
|
| 78698 |
+
"step": 22382
|
| 78699 |
+
},
|
| 78700 |
+
{
|
| 78701 |
+
"epoch": 0.9952425414610289,
|
| 78702 |
+
"grad_norm": 0.057425398379564285,
|
| 78703 |
+
"learning_rate": 6.003022389522439e-08,
|
| 78704 |
+
"loss": 1.4456,
|
| 78705 |
+
"step": 22384
|
| 78706 |
+
},
|
| 78707 |
+
{
|
| 78708 |
+
"epoch": 0.9953314659197012,
|
| 78709 |
+
"grad_norm": 0.05751774087548256,
|
| 78710 |
+
"learning_rate": 5.778634632536139e-08,
|
| 78711 |
+
"loss": 1.4431,
|
| 78712 |
+
"step": 22386
|
| 78713 |
+
},
|
| 78714 |
+
{
|
| 78715 |
+
"epoch": 0.9954203903783736,
|
| 78716 |
+
"grad_norm": 0.05865611135959625,
|
| 78717 |
+
"learning_rate": 5.558520601917083e-08,
|
| 78718 |
+
"loss": 1.4444,
|
| 78719 |
+
"step": 22388
|
| 78720 |
+
},
|
| 78721 |
+
{
|
| 78722 |
+
"epoch": 0.995509314837046,
|
| 78723 |
+
"grad_norm": 0.05733845382928848,
|
| 78724 |
+
"learning_rate": 5.342680316466897e-08,
|
| 78725 |
+
"loss": 1.4468,
|
| 78726 |
+
"step": 22390
|
| 78727 |
+
},
|
| 78728 |
+
{
|
| 78729 |
+
"epoch": 0.9955982392957183,
|
| 78730 |
+
"grad_norm": 0.059069231152534485,
|
| 78731 |
+
"learning_rate": 5.131113794643039e-08,
|
| 78732 |
+
"loss": 1.4488,
|
| 78733 |
+
"step": 22392
|
| 78734 |
+
},
|
| 78735 |
+
{
|
| 78736 |
+
"epoch": 0.9956871637543906,
|
| 78737 |
+
"grad_norm": 0.05756256729364395,
|
| 78738 |
+
"learning_rate": 4.923821054536592e-08,
|
| 78739 |
+
"loss": 1.4397,
|
| 78740 |
+
"step": 22394
|
| 78741 |
+
},
|
| 78742 |
+
{
|
| 78743 |
+
"epoch": 0.995776088213063,
|
| 78744 |
+
"grad_norm": 0.057630039751529694,
|
| 78745 |
+
"learning_rate": 4.7208021138611666e-08,
|
| 78746 |
+
"loss": 1.4437,
|
| 78747 |
+
"step": 22396
|
| 78748 |
+
},
|
| 78749 |
+
{
|
| 78750 |
+
"epoch": 0.9958650126717353,
|
| 78751 |
+
"grad_norm": 0.057779572904109955,
|
| 78752 |
+
"learning_rate": 4.5220569899750984e-08,
|
| 78753 |
+
"loss": 1.4464,
|
| 78754 |
+
"step": 22398
|
| 78755 |
+
},
|
| 78756 |
+
{
|
| 78757 |
+
"epoch": 0.9959539371304077,
|
| 78758 |
+
"grad_norm": 0.05961634963750839,
|
| 78759 |
+
"learning_rate": 4.327585699859249e-08,
|
| 78760 |
+
"loss": 1.4437,
|
| 78761 |
+
"step": 22400
|
| 78762 |
+
},
|
| 78763 |
+
{
|
| 78764 |
+
"epoch": 0.9960428615890801,
|
| 78765 |
+
"grad_norm": 0.05674600601196289,
|
| 78766 |
+
"learning_rate": 4.137388260155861e-08,
|
| 78767 |
+
"loss": 1.4464,
|
| 78768 |
+
"step": 22402
|
| 78769 |
+
},
|
| 78770 |
+
{
|
| 78771 |
+
"epoch": 0.9961317860477524,
|
| 78772 |
+
"grad_norm": 0.056886181235313416,
|
| 78773 |
+
"learning_rate": 3.951464687107498e-08,
|
| 78774 |
+
"loss": 1.445,
|
| 78775 |
+
"step": 22404
|
| 78776 |
+
},
|
| 78777 |
+
{
|
| 78778 |
+
"epoch": 0.9962207105064248,
|
| 78779 |
+
"grad_norm": 0.0578632578253746,
|
| 78780 |
+
"learning_rate": 3.769814996612553e-08,
|
| 78781 |
+
"loss": 1.4484,
|
| 78782 |
+
"step": 22406
|
| 78783 |
+
},
|
| 78784 |
+
{
|
| 78785 |
+
"epoch": 0.9963096349650972,
|
| 78786 |
+
"grad_norm": 0.057221461087465286,
|
| 78787 |
+
"learning_rate": 3.592439204197495e-08,
|
| 78788 |
+
"loss": 1.445,
|
| 78789 |
+
"step": 22408
|
| 78790 |
+
},
|
| 78791 |
+
{
|
| 78792 |
+
"epoch": 0.9963985594237695,
|
| 78793 |
+
"grad_norm": 0.058185044676065445,
|
| 78794 |
+
"learning_rate": 3.4193373250335224e-08,
|
| 78795 |
+
"loss": 1.4412,
|
| 78796 |
+
"step": 22410
|
| 78797 |
+
},
|
| 78798 |
+
{
|
| 78799 |
+
"epoch": 0.9964874838824419,
|
| 78800 |
+
"grad_norm": 0.05843387171626091,
|
| 78801 |
+
"learning_rate": 3.250509373908805e-08,
|
| 78802 |
+
"loss": 1.4423,
|
| 78803 |
+
"step": 22412
|
| 78804 |
+
},
|
| 78805 |
+
{
|
| 78806 |
+
"epoch": 0.9965764083411143,
|
| 78807 |
+
"grad_norm": 0.05737845599651337,
|
| 78808 |
+
"learning_rate": 3.085955365261794e-08,
|
| 78809 |
+
"loss": 1.4465,
|
| 78810 |
+
"step": 22414
|
| 78811 |
+
},
|
| 78812 |
+
{
|
| 78813 |
+
"epoch": 0.9966653327997865,
|
| 78814 |
+
"grad_norm": 0.05840914696455002,
|
| 78815 |
+
"learning_rate": 2.925675313159015e-08,
|
| 78816 |
+
"loss": 1.4438,
|
| 78817 |
+
"step": 22416
|
| 78818 |
+
},
|
| 78819 |
+
{
|
| 78820 |
+
"epoch": 0.9967542572584589,
|
| 78821 |
+
"grad_norm": 0.05688779801130295,
|
| 78822 |
+
"learning_rate": 2.769669231295069e-08,
|
| 78823 |
+
"loss": 1.4438,
|
| 78824 |
+
"step": 22418
|
| 78825 |
+
},
|
| 78826 |
+
{
|
| 78827 |
+
"epoch": 0.9968431817171313,
|
| 78828 |
+
"grad_norm": 0.05774452164769173,
|
| 78829 |
+
"learning_rate": 2.617937133009285e-08,
|
| 78830 |
+
"loss": 1.4466,
|
| 78831 |
+
"step": 22420
|
| 78832 |
+
},
|
| 78833 |
+
{
|
| 78834 |
+
"epoch": 0.9969321061758036,
|
| 78835 |
+
"grad_norm": 0.05797778069972992,
|
| 78836 |
+
"learning_rate": 2.4704790312746194e-08,
|
| 78837 |
+
"loss": 1.4443,
|
| 78838 |
+
"step": 22422
|
| 78839 |
+
},
|
| 78840 |
+
{
|
| 78841 |
+
"epoch": 0.997021030634476,
|
| 78842 |
+
"grad_norm": 0.057229943573474884,
|
| 78843 |
+
"learning_rate": 2.327294938697655e-08,
|
| 78844 |
+
"loss": 1.4496,
|
| 78845 |
+
"step": 22424
|
| 78846 |
+
},
|
| 78847 |
+
{
|
| 78848 |
+
"epoch": 0.9971099550931484,
|
| 78849 |
+
"grad_norm": 0.05771101638674736,
|
| 78850 |
+
"learning_rate": 2.188384867513049e-08,
|
| 78851 |
+
"loss": 1.4412,
|
| 78852 |
+
"step": 22426
|
| 78853 |
+
},
|
| 78854 |
+
{
|
| 78855 |
+
"epoch": 0.9971988795518207,
|
| 78856 |
+
"grad_norm": 0.057632721960544586,
|
| 78857 |
+
"learning_rate": 2.053748829600188e-08,
|
| 78858 |
+
"loss": 1.4515,
|
| 78859 |
+
"step": 22428
|
| 78860 |
+
},
|
| 78861 |
+
{
|
| 78862 |
+
"epoch": 0.9972878040104931,
|
| 78863 |
+
"grad_norm": 0.05861422419548035,
|
| 78864 |
+
"learning_rate": 1.9233868364665342e-08,
|
| 78865 |
+
"loss": 1.4463,
|
| 78866 |
+
"step": 22430
|
| 78867 |
+
},
|
| 78868 |
+
{
|
| 78869 |
+
"epoch": 0.9973767284691655,
|
| 78870 |
+
"grad_norm": 0.05753085017204285,
|
| 78871 |
+
"learning_rate": 1.797298899258726e-08,
|
| 78872 |
+
"loss": 1.4453,
|
| 78873 |
+
"step": 22432
|
| 78874 |
+
},
|
| 78875 |
+
{
|
| 78876 |
+
"epoch": 0.9974656529278378,
|
| 78877 |
+
"grad_norm": 0.057691846042871475,
|
| 78878 |
+
"learning_rate": 1.6754850287459267e-08,
|
| 78879 |
+
"loss": 1.4435,
|
| 78880 |
+
"step": 22434
|
| 78881 |
+
},
|
| 78882 |
+
{
|
| 78883 |
+
"epoch": 0.9975545773865102,
|
| 78884 |
+
"grad_norm": 0.05841954052448273,
|
| 78885 |
+
"learning_rate": 1.5579452353531308e-08,
|
| 78886 |
+
"loss": 1.4526,
|
| 78887 |
+
"step": 22436
|
| 78888 |
+
},
|
| 78889 |
+
{
|
| 78890 |
+
"epoch": 0.9976435018451825,
|
| 78891 |
+
"grad_norm": 0.05724509432911873,
|
| 78892 |
+
"learning_rate": 1.4446795291223059e-08,
|
| 78893 |
+
"loss": 1.4428,
|
| 78894 |
+
"step": 22438
|
| 78895 |
+
},
|
| 78896 |
+
{
|
| 78897 |
+
"epoch": 0.9977324263038548,
|
| 78898 |
+
"grad_norm": 0.05789647623896599,
|
| 78899 |
+
"learning_rate": 1.3356879197401473e-08,
|
| 78900 |
+
"loss": 1.445,
|
| 78901 |
+
"step": 22440
|
| 78902 |
+
},
|
| 78903 |
+
{
|
| 78904 |
+
"epoch": 0.9978213507625272,
|
| 78905 |
+
"grad_norm": 0.058276426047086716,
|
| 78906 |
+
"learning_rate": 1.2309704165158753e-08,
|
| 78907 |
+
"loss": 1.4454,
|
| 78908 |
+
"step": 22442
|
| 78909 |
+
},
|
| 78910 |
+
{
|
| 78911 |
+
"epoch": 0.9979102752211996,
|
| 78912 |
+
"grad_norm": 0.057980820536613464,
|
| 78913 |
+
"learning_rate": 1.1305270284089897e-08,
|
| 78914 |
+
"loss": 1.4448,
|
| 78915 |
+
"step": 22444
|
| 78916 |
+
},
|
| 78917 |
+
{
|
| 78918 |
+
"epoch": 0.9979991996798719,
|
| 78919 |
+
"grad_norm": 0.05673612281680107,
|
| 78920 |
+
"learning_rate": 1.0343577639959634e-08,
|
| 78921 |
+
"loss": 1.446,
|
| 78922 |
+
"step": 22446
|
| 78923 |
+
},
|
| 78924 |
+
{
|
| 78925 |
+
"epoch": 0.9980881241385443,
|
| 78926 |
+
"grad_norm": 0.05783254653215408,
|
| 78927 |
+
"learning_rate": 9.424626315091e-09,
|
| 78928 |
+
"loss": 1.4437,
|
| 78929 |
+
"step": 22448
|
| 78930 |
+
},
|
| 78931 |
+
{
|
| 78932 |
+
"epoch": 0.9981770485972167,
|
| 78933 |
+
"grad_norm": 0.058315496891736984,
|
| 78934 |
+
"learning_rate": 8.548416388032277e-09,
|
| 78935 |
+
"loss": 1.4446,
|
| 78936 |
+
"step": 22450
|
| 78937 |
+
},
|
| 78938 |
+
{
|
| 78939 |
+
"epoch": 0.998265973055889,
|
| 78940 |
+
"grad_norm": 0.05717792361974716,
|
| 78941 |
+
"learning_rate": 7.714947933556982e-09,
|
| 78942 |
+
"loss": 1.4445,
|
| 78943 |
+
"step": 22452
|
| 78944 |
+
},
|
| 78945 |
+
{
|
| 78946 |
+
"epoch": 0.9983548975145614,
|
| 78947 |
+
"grad_norm": 0.05846971645951271,
|
| 78948 |
+
"learning_rate": 6.924221023052457e-09,
|
| 78949 |
+
"loss": 1.4477,
|
| 78950 |
+
"step": 22454
|
| 78951 |
+
},
|
| 78952 |
+
{
|
| 78953 |
+
"epoch": 0.9984438219732338,
|
| 78954 |
+
"grad_norm": 0.05766315013170242,
|
| 78955 |
+
"learning_rate": 6.176235724075774e-09,
|
| 78956 |
+
"loss": 1.4465,
|
| 78957 |
+
"step": 22456
|
| 78958 |
+
},
|
| 78959 |
+
{
|
| 78960 |
+
"epoch": 0.9985327464319061,
|
| 78961 |
+
"grad_norm": 0.058049630373716354,
|
| 78962 |
+
"learning_rate": 5.470992100520267e-09,
|
| 78963 |
+
"loss": 1.4475,
|
| 78964 |
+
"step": 22458
|
| 78965 |
+
},
|
| 78966 |
+
{
|
| 78967 |
+
"epoch": 0.9986216708905784,
|
| 78968 |
+
"grad_norm": 0.057937148958444595,
|
| 78969 |
+
"learning_rate": 4.808490212726557e-09,
|
| 78970 |
+
"loss": 1.4404,
|
| 78971 |
+
"step": 22460
|
| 78972 |
+
},
|
| 78973 |
+
{
|
| 78974 |
+
"epoch": 0.9987105953492508,
|
| 78975 |
+
"grad_norm": 0.05762643739581108,
|
| 78976 |
+
"learning_rate": 4.18873011731602e-09,
|
| 78977 |
+
"loss": 1.446,
|
| 78978 |
+
"step": 22462
|
| 78979 |
+
},
|
| 78980 |
+
{
|
| 78981 |
+
"epoch": 0.9987995198079231,
|
| 78982 |
+
"grad_norm": 0.05884932354092598,
|
| 78983 |
+
"learning_rate": 3.611711867246292e-09,
|
| 78984 |
+
"loss": 1.4505,
|
| 78985 |
+
"step": 22464
|
| 78986 |
+
},
|
| 78987 |
+
{
|
| 78988 |
+
"epoch": 0.9988884442665955,
|
| 78989 |
+
"grad_norm": 0.05755854770541191,
|
| 78990 |
+
"learning_rate": 3.0774355119222996e-09,
|
| 78991 |
+
"loss": 1.4517,
|
| 78992 |
+
"step": 22466
|
| 78993 |
+
},
|
| 78994 |
+
{
|
| 78995 |
+
"epoch": 0.9989773687252679,
|
| 78996 |
+
"grad_norm": 0.05807171016931534,
|
| 78997 |
+
"learning_rate": 2.585901096863186e-09,
|
| 78998 |
+
"loss": 1.4417,
|
| 78999 |
+
"step": 22468
|
| 79000 |
+
},
|
| 79001 |
+
{
|
| 79002 |
+
"epoch": 0.9990662931839402,
|
| 79003 |
+
"grad_norm": 0.057942621409893036,
|
| 79004 |
+
"learning_rate": 2.1371086642574255e-09,
|
| 79005 |
+
"loss": 1.4481,
|
| 79006 |
+
"step": 22470
|
| 79007 |
+
},
|
| 79008 |
+
{
|
| 79009 |
+
"epoch": 0.9991552176426126,
|
| 79010 |
+
"grad_norm": 0.05796192213892937,
|
| 79011 |
+
"learning_rate": 1.7310582523522023e-09,
|
| 79012 |
+
"loss": 1.4428,
|
| 79013 |
+
"step": 22472
|
| 79014 |
+
},
|
| 79015 |
+
{
|
| 79016 |
+
"epoch": 0.999244142101285,
|
| 79017 |
+
"grad_norm": 0.05767205357551575,
|
| 79018 |
+
"learning_rate": 1.3677498958974966e-09,
|
| 79019 |
+
"loss": 1.448,
|
| 79020 |
+
"step": 22474
|
| 79021 |
+
},
|
| 79022 |
+
{
|
| 79023 |
+
"epoch": 0.9993330665599574,
|
| 79024 |
+
"grad_norm": 0.05703622102737427,
|
| 79025 |
+
"learning_rate": 1.047183625924042e-09,
|
| 79026 |
+
"loss": 1.4464,
|
| 79027 |
+
"step": 22476
|
| 79028 |
+
},
|
| 79029 |
+
{
|
| 79030 |
+
"epoch": 0.9994219910186297,
|
| 79031 |
+
"grad_norm": 0.05870012938976288,
|
| 79032 |
+
"learning_rate": 7.693594699098582e-10,
|
| 79033 |
+
"loss": 1.4521,
|
| 79034 |
+
"step": 22478
|
| 79035 |
+
},
|
| 79036 |
+
{
|
| 79037 |
+
"epoch": 0.9995109154773021,
|
| 79038 |
+
"grad_norm": 0.057245586067438126,
|
| 79039 |
+
"learning_rate": 5.342774515026961e-10,
|
| 79040 |
+
"loss": 1.4448,
|
| 79041 |
+
"step": 22480
|
| 79042 |
+
},
|
| 79043 |
+
{
|
| 79044 |
+
"epoch": 0.9995998399359743,
|
| 79045 |
+
"grad_norm": 0.05662744492292404,
|
| 79046 |
+
"learning_rate": 3.419375909086142e-10,
|
| 79047 |
+
"loss": 1.4334,
|
| 79048 |
+
"step": 22482
|
| 79049 |
+
},
|
| 79050 |
+
{
|
| 79051 |
+
"epoch": 0.9996887643946467,
|
| 79052 |
+
"grad_norm": 0.05766800418496132,
|
| 79053 |
+
"learning_rate": 1.923399044478913e-10,
|
| 79054 |
+
"loss": 1.4482,
|
| 79055 |
+
"step": 22484
|
| 79056 |
+
},
|
| 79057 |
+
{
|
| 79058 |
+
"epoch": 0.9997776888533191,
|
| 79059 |
+
"grad_norm": 0.058209143579006195,
|
| 79060 |
+
"learning_rate": 8.548440505462551e-11,
|
| 79061 |
+
"loss": 1.4435,
|
| 79062 |
+
"step": 22486
|
| 79063 |
+
},
|
| 79064 |
+
{
|
| 79065 |
+
"epoch": 0.9998666133119914,
|
| 79066 |
+
"grad_norm": 0.05729079991579056,
|
| 79067 |
+
"learning_rate": 2.1371101721623376e-11,
|
| 79068 |
+
"loss": 1.4459,
|
| 79069 |
+
"step": 22488
|
| 79070 |
+
},
|
| 79071 |
+
{
|
| 79072 |
+
"epoch": 0.9999555377706638,
|
| 79073 |
+
"grad_norm": 0.057013168931007385,
|
| 79074 |
+
"learning_rate": 0.0,
|
| 79075 |
+
"loss": 1.4473,
|
| 79076 |
+
"step": 22490
|
| 79077 |
}
|
| 79078 |
],
|
| 79079 |
"logging_steps": 2,
|
|
|
|
| 79088 |
"should_evaluate": false,
|
| 79089 |
"should_log": false,
|
| 79090 |
"should_save": true,
|
| 79091 |
+
"should_training_stop": true
|
| 79092 |
},
|
| 79093 |
"attributes": {}
|
| 79094 |
}
|
| 79095 |
},
|
| 79096 |
+
"total_flos": 4.812454399322358e+19,
|
| 79097 |
"train_batch_size": 768,
|
| 79098 |
"trial_name": null,
|
| 79099 |
"trial_params": null
|