Fanucci
commited on
Training in progress, step 1400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 671149168
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5210b8e7a636162c39378a8939d4c31c398430ff7e09e1eff0febb1fb0faf8f6
|
| 3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1342555602
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7af4714670ec3d95954fb016cb9b1f5d2c80a1f74089d52ff0460d7ffed9b6f4
|
| 3 |
size 1342555602
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd5cd67dee7267dcb537fb31aede3e18d0e3e45fa623251f8fd6ad78c9019d93
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b24b043640c4c0fec08278af8607ef0c8db96b29dad4d3ed04b5e555adfd0924
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 3.1351470947265625,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -8463,6 +8463,1414 @@
|
|
| 8463 |
"eval_samples_per_second": 5.635,
|
| 8464 |
"eval_steps_per_second": 1.409,
|
| 8465 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8466 |
}
|
| 8467 |
],
|
| 8468 |
"logging_steps": 1,
|
|
@@ -8477,7 +9885,7 @@
|
|
| 8477 |
"early_stopping_threshold": 0.0
|
| 8478 |
},
|
| 8479 |
"attributes": {
|
| 8480 |
-
"early_stopping_patience_counter":
|
| 8481 |
}
|
| 8482 |
},
|
| 8483 |
"TrainerControl": {
|
|
@@ -8491,7 +9899,7 @@
|
|
| 8491 |
"attributes": {}
|
| 8492 |
}
|
| 8493 |
},
|
| 8494 |
-
"total_flos": 1.
|
| 8495 |
"train_batch_size": 4,
|
| 8496 |
"trial_name": null,
|
| 8497 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 3.1351470947265625,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
| 4 |
+
"epoch": 0.4479283314669653,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 1400,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 8463 |
"eval_samples_per_second": 5.635,
|
| 8464 |
"eval_steps_per_second": 1.409,
|
| 8465 |
"step": 1200
|
| 8466 |
+
},
|
| 8467 |
+
{
|
| 8468 |
+
"epoch": 0.3842585186370181,
|
| 8469 |
+
"grad_norm": 27.103090286254883,
|
| 8470 |
+
"learning_rate": 0.0002740839244415924,
|
| 8471 |
+
"loss": 6.8103,
|
| 8472 |
+
"step": 1201
|
| 8473 |
+
},
|
| 8474 |
+
{
|
| 8475 |
+
"epoch": 0.3845784674452088,
|
| 8476 |
+
"grad_norm": 11.38796615600586,
|
| 8477 |
+
"learning_rate": 0.00027389965247590016,
|
| 8478 |
+
"loss": 6.7159,
|
| 8479 |
+
"step": 1202
|
| 8480 |
+
},
|
| 8481 |
+
{
|
| 8482 |
+
"epoch": 0.38489841625339943,
|
| 8483 |
+
"grad_norm": 9.166263580322266,
|
| 8484 |
+
"learning_rate": 0.0002737153127150736,
|
| 8485 |
+
"loss": 6.5407,
|
| 8486 |
+
"step": 1203
|
| 8487 |
+
},
|
| 8488 |
+
{
|
| 8489 |
+
"epoch": 0.38521836506159013,
|
| 8490 |
+
"grad_norm": 10.899460792541504,
|
| 8491 |
+
"learning_rate": 0.00027353090535469065,
|
| 8492 |
+
"loss": 6.6115,
|
| 8493 |
+
"step": 1204
|
| 8494 |
+
},
|
| 8495 |
+
{
|
| 8496 |
+
"epoch": 0.3855383138697808,
|
| 8497 |
+
"grad_norm": 12.029467582702637,
|
| 8498 |
+
"learning_rate": 0.00027334643059040035,
|
| 8499 |
+
"loss": 6.6065,
|
| 8500 |
+
"step": 1205
|
| 8501 |
+
},
|
| 8502 |
+
{
|
| 8503 |
+
"epoch": 0.3858582626779715,
|
| 8504 |
+
"grad_norm": 8.318588256835938,
|
| 8505 |
+
"learning_rate": 0.0002731618886179235,
|
| 8506 |
+
"loss": 6.8032,
|
| 8507 |
+
"step": 1206
|
| 8508 |
+
},
|
| 8509 |
+
{
|
| 8510 |
+
"epoch": 0.3861782114861622,
|
| 8511 |
+
"grad_norm": 8.223920822143555,
|
| 8512 |
+
"learning_rate": 0.00027297727963305227,
|
| 8513 |
+
"loss": 6.5092,
|
| 8514 |
+
"step": 1207
|
| 8515 |
+
},
|
| 8516 |
+
{
|
| 8517 |
+
"epoch": 0.3864981602943529,
|
| 8518 |
+
"grad_norm": 22.681262969970703,
|
| 8519 |
+
"learning_rate": 0.0002727926038316499,
|
| 8520 |
+
"loss": 6.8623,
|
| 8521 |
+
"step": 1208
|
| 8522 |
+
},
|
| 8523 |
+
{
|
| 8524 |
+
"epoch": 0.3868181091025436,
|
| 8525 |
+
"grad_norm": 10.376825332641602,
|
| 8526 |
+
"learning_rate": 0.0002726078614096504,
|
| 8527 |
+
"loss": 6.6216,
|
| 8528 |
+
"step": 1209
|
| 8529 |
+
},
|
| 8530 |
+
{
|
| 8531 |
+
"epoch": 0.3871380579107343,
|
| 8532 |
+
"grad_norm": 5.749302864074707,
|
| 8533 |
+
"learning_rate": 0.0002724230525630586,
|
| 8534 |
+
"loss": 6.6049,
|
| 8535 |
+
"step": 1210
|
| 8536 |
+
},
|
| 8537 |
+
{
|
| 8538 |
+
"epoch": 0.387458006718925,
|
| 8539 |
+
"grad_norm": 8.080632209777832,
|
| 8540 |
+
"learning_rate": 0.00027223817748794985,
|
| 8541 |
+
"loss": 6.6569,
|
| 8542 |
+
"step": 1211
|
| 8543 |
+
},
|
| 8544 |
+
{
|
| 8545 |
+
"epoch": 0.38777795552711564,
|
| 8546 |
+
"grad_norm": 12.558454513549805,
|
| 8547 |
+
"learning_rate": 0.00027205323638046947,
|
| 8548 |
+
"loss": 6.3948,
|
| 8549 |
+
"step": 1212
|
| 8550 |
+
},
|
| 8551 |
+
{
|
| 8552 |
+
"epoch": 0.38809790433530633,
|
| 8553 |
+
"grad_norm": 9.190141677856445,
|
| 8554 |
+
"learning_rate": 0.0002718682294368331,
|
| 8555 |
+
"loss": 6.6437,
|
| 8556 |
+
"step": 1213
|
| 8557 |
+
},
|
| 8558 |
+
{
|
| 8559 |
+
"epoch": 0.388417853143497,
|
| 8560 |
+
"grad_norm": 4.570112228393555,
|
| 8561 |
+
"learning_rate": 0.00027168315685332633,
|
| 8562 |
+
"loss": 6.5194,
|
| 8563 |
+
"step": 1214
|
| 8564 |
+
},
|
| 8565 |
+
{
|
| 8566 |
+
"epoch": 0.3887378019516877,
|
| 8567 |
+
"grad_norm": 9.28131103515625,
|
| 8568 |
+
"learning_rate": 0.0002714980188263041,
|
| 8569 |
+
"loss": 6.5532,
|
| 8570 |
+
"step": 1215
|
| 8571 |
+
},
|
| 8572 |
+
{
|
| 8573 |
+
"epoch": 0.3890577507598784,
|
| 8574 |
+
"grad_norm": 11.132205963134766,
|
| 8575 |
+
"learning_rate": 0.00027131281555219084,
|
| 8576 |
+
"loss": 6.6146,
|
| 8577 |
+
"step": 1216
|
| 8578 |
+
},
|
| 8579 |
+
{
|
| 8580 |
+
"epoch": 0.3893776995680691,
|
| 8581 |
+
"grad_norm": 7.617869853973389,
|
| 8582 |
+
"learning_rate": 0.00027112754722748037,
|
| 8583 |
+
"loss": 6.5418,
|
| 8584 |
+
"step": 1217
|
| 8585 |
+
},
|
| 8586 |
+
{
|
| 8587 |
+
"epoch": 0.3896976483762598,
|
| 8588 |
+
"grad_norm": 6.569556713104248,
|
| 8589 |
+
"learning_rate": 0.00027094221404873537,
|
| 8590 |
+
"loss": 6.5742,
|
| 8591 |
+
"step": 1218
|
| 8592 |
+
},
|
| 8593 |
+
{
|
| 8594 |
+
"epoch": 0.3900175971844505,
|
| 8595 |
+
"grad_norm": 9.705401420593262,
|
| 8596 |
+
"learning_rate": 0.0002707568162125875,
|
| 8597 |
+
"loss": 6.5249,
|
| 8598 |
+
"step": 1219
|
| 8599 |
+
},
|
| 8600 |
+
{
|
| 8601 |
+
"epoch": 0.3903375459926412,
|
| 8602 |
+
"grad_norm": 8.43759822845459,
|
| 8603 |
+
"learning_rate": 0.00027057135391573683,
|
| 8604 |
+
"loss": 6.7944,
|
| 8605 |
+
"step": 1220
|
| 8606 |
+
},
|
| 8607 |
+
{
|
| 8608 |
+
"epoch": 0.39065749480083184,
|
| 8609 |
+
"grad_norm": 9.743579864501953,
|
| 8610 |
+
"learning_rate": 0.00027038582735495196,
|
| 8611 |
+
"loss": 6.6869,
|
| 8612 |
+
"step": 1221
|
| 8613 |
+
},
|
| 8614 |
+
{
|
| 8615 |
+
"epoch": 0.39097744360902253,
|
| 8616 |
+
"grad_norm": 9.03455638885498,
|
| 8617 |
+
"learning_rate": 0.0002702002367270695,
|
| 8618 |
+
"loss": 6.4196,
|
| 8619 |
+
"step": 1222
|
| 8620 |
+
},
|
| 8621 |
+
{
|
| 8622 |
+
"epoch": 0.39129739241721323,
|
| 8623 |
+
"grad_norm": 11.69813060760498,
|
| 8624 |
+
"learning_rate": 0.0002700145822289942,
|
| 8625 |
+
"loss": 6.5824,
|
| 8626 |
+
"step": 1223
|
| 8627 |
+
},
|
| 8628 |
+
{
|
| 8629 |
+
"epoch": 0.3916173412254039,
|
| 8630 |
+
"grad_norm": 11.977036476135254,
|
| 8631 |
+
"learning_rate": 0.00026982886405769855,
|
| 8632 |
+
"loss": 6.8119,
|
| 8633 |
+
"step": 1224
|
| 8634 |
+
},
|
| 8635 |
+
{
|
| 8636 |
+
"epoch": 0.3919372900335946,
|
| 8637 |
+
"grad_norm": 7.943235874176025,
|
| 8638 |
+
"learning_rate": 0.00026964308241022255,
|
| 8639 |
+
"loss": 6.6036,
|
| 8640 |
+
"step": 1225
|
| 8641 |
+
},
|
| 8642 |
+
{
|
| 8643 |
+
"epoch": 0.3922572388417853,
|
| 8644 |
+
"grad_norm": 5.831643581390381,
|
| 8645 |
+
"learning_rate": 0.00026945723748367353,
|
| 8646 |
+
"loss": 6.747,
|
| 8647 |
+
"step": 1226
|
| 8648 |
+
},
|
| 8649 |
+
{
|
| 8650 |
+
"epoch": 0.392577187649976,
|
| 8651 |
+
"grad_norm": 13.803399085998535,
|
| 8652 |
+
"learning_rate": 0.00026927132947522604,
|
| 8653 |
+
"loss": 6.6117,
|
| 8654 |
+
"step": 1227
|
| 8655 |
+
},
|
| 8656 |
+
{
|
| 8657 |
+
"epoch": 0.3928971364581667,
|
| 8658 |
+
"grad_norm": 8.283905029296875,
|
| 8659 |
+
"learning_rate": 0.0002690853585821214,
|
| 8660 |
+
"loss": 6.6316,
|
| 8661 |
+
"step": 1228
|
| 8662 |
+
},
|
| 8663 |
+
{
|
| 8664 |
+
"epoch": 0.3932170852663574,
|
| 8665 |
+
"grad_norm": 8.696572303771973,
|
| 8666 |
+
"learning_rate": 0.00026889932500166785,
|
| 8667 |
+
"loss": 6.6446,
|
| 8668 |
+
"step": 1229
|
| 8669 |
+
},
|
| 8670 |
+
{
|
| 8671 |
+
"epoch": 0.3935370340745481,
|
| 8672 |
+
"grad_norm": 4.740365982055664,
|
| 8673 |
+
"learning_rate": 0.00026871322893124,
|
| 8674 |
+
"loss": 6.6858,
|
| 8675 |
+
"step": 1230
|
| 8676 |
+
},
|
| 8677 |
+
{
|
| 8678 |
+
"epoch": 0.39385698288273874,
|
| 8679 |
+
"grad_norm": 11.38948917388916,
|
| 8680 |
+
"learning_rate": 0.0002685270705682788,
|
| 8681 |
+
"loss": 6.748,
|
| 8682 |
+
"step": 1231
|
| 8683 |
+
},
|
| 8684 |
+
{
|
| 8685 |
+
"epoch": 0.39417693169092943,
|
| 8686 |
+
"grad_norm": 9.747567176818848,
|
| 8687 |
+
"learning_rate": 0.00026834085011029135,
|
| 8688 |
+
"loss": 6.4157,
|
| 8689 |
+
"step": 1232
|
| 8690 |
+
},
|
| 8691 |
+
{
|
| 8692 |
+
"epoch": 0.39449688049912013,
|
| 8693 |
+
"grad_norm": 7.497620582580566,
|
| 8694 |
+
"learning_rate": 0.0002681545677548505,
|
| 8695 |
+
"loss": 6.6276,
|
| 8696 |
+
"step": 1233
|
| 8697 |
+
},
|
| 8698 |
+
{
|
| 8699 |
+
"epoch": 0.3948168293073108,
|
| 8700 |
+
"grad_norm": 10.829628944396973,
|
| 8701 |
+
"learning_rate": 0.0002679682236995948,
|
| 8702 |
+
"loss": 6.6541,
|
| 8703 |
+
"step": 1234
|
| 8704 |
+
},
|
| 8705 |
+
{
|
| 8706 |
+
"epoch": 0.3951367781155015,
|
| 8707 |
+
"grad_norm": 6.678272247314453,
|
| 8708 |
+
"learning_rate": 0.0002677818181422284,
|
| 8709 |
+
"loss": 6.4207,
|
| 8710 |
+
"step": 1235
|
| 8711 |
+
},
|
| 8712 |
+
{
|
| 8713 |
+
"epoch": 0.3954567269236922,
|
| 8714 |
+
"grad_norm": 6.341800689697266,
|
| 8715 |
+
"learning_rate": 0.0002675953512805206,
|
| 8716 |
+
"loss": 6.695,
|
| 8717 |
+
"step": 1236
|
| 8718 |
+
},
|
| 8719 |
+
{
|
| 8720 |
+
"epoch": 0.3957766757318829,
|
| 8721 |
+
"grad_norm": 7.859994888305664,
|
| 8722 |
+
"learning_rate": 0.0002674088233123056,
|
| 8723 |
+
"loss": 6.3617,
|
| 8724 |
+
"step": 1237
|
| 8725 |
+
},
|
| 8726 |
+
{
|
| 8727 |
+
"epoch": 0.3960966245400736,
|
| 8728 |
+
"grad_norm": 10.069356918334961,
|
| 8729 |
+
"learning_rate": 0.0002672222344354828,
|
| 8730 |
+
"loss": 6.7558,
|
| 8731 |
+
"step": 1238
|
| 8732 |
+
},
|
| 8733 |
+
{
|
| 8734 |
+
"epoch": 0.3964165733482643,
|
| 8735 |
+
"grad_norm": 14.660593032836914,
|
| 8736 |
+
"learning_rate": 0.0002670355848480158,
|
| 8737 |
+
"loss": 6.6104,
|
| 8738 |
+
"step": 1239
|
| 8739 |
+
},
|
| 8740 |
+
{
|
| 8741 |
+
"epoch": 0.39673652215645494,
|
| 8742 |
+
"grad_norm": 5.288352012634277,
|
| 8743 |
+
"learning_rate": 0.000266848874747933,
|
| 8744 |
+
"loss": 6.5524,
|
| 8745 |
+
"step": 1240
|
| 8746 |
+
},
|
| 8747 |
+
{
|
| 8748 |
+
"epoch": 0.39705647096464564,
|
| 8749 |
+
"grad_norm": 6.08315372467041,
|
| 8750 |
+
"learning_rate": 0.0002666621043333266,
|
| 8751 |
+
"loss": 6.2126,
|
| 8752 |
+
"step": 1241
|
| 8753 |
+
},
|
| 8754 |
+
{
|
| 8755 |
+
"epoch": 0.39737641977283633,
|
| 8756 |
+
"grad_norm": 6.1647162437438965,
|
| 8757 |
+
"learning_rate": 0.00026647527380235314,
|
| 8758 |
+
"loss": 6.6435,
|
| 8759 |
+
"step": 1242
|
| 8760 |
+
},
|
| 8761 |
+
{
|
| 8762 |
+
"epoch": 0.397696368581027,
|
| 8763 |
+
"grad_norm": 5.414215087890625,
|
| 8764 |
+
"learning_rate": 0.0002662883833532328,
|
| 8765 |
+
"loss": 6.2672,
|
| 8766 |
+
"step": 1243
|
| 8767 |
+
},
|
| 8768 |
+
{
|
| 8769 |
+
"epoch": 0.3980163173892177,
|
| 8770 |
+
"grad_norm": 5.315961837768555,
|
| 8771 |
+
"learning_rate": 0.00026610143318424925,
|
| 8772 |
+
"loss": 6.3607,
|
| 8773 |
+
"step": 1244
|
| 8774 |
+
},
|
| 8775 |
+
{
|
| 8776 |
+
"epoch": 0.3983362661974084,
|
| 8777 |
+
"grad_norm": 5.834237098693848,
|
| 8778 |
+
"learning_rate": 0.0002659144234937497,
|
| 8779 |
+
"loss": 6.558,
|
| 8780 |
+
"step": 1245
|
| 8781 |
+
},
|
| 8782 |
+
{
|
| 8783 |
+
"epoch": 0.3986562150055991,
|
| 8784 |
+
"grad_norm": 9.148299217224121,
|
| 8785 |
+
"learning_rate": 0.0002657273544801444,
|
| 8786 |
+
"loss": 6.4416,
|
| 8787 |
+
"step": 1246
|
| 8788 |
+
},
|
| 8789 |
+
{
|
| 8790 |
+
"epoch": 0.3989761638137898,
|
| 8791 |
+
"grad_norm": 10.394495010375977,
|
| 8792 |
+
"learning_rate": 0.0002655402263419065,
|
| 8793 |
+
"loss": 6.5473,
|
| 8794 |
+
"step": 1247
|
| 8795 |
+
},
|
| 8796 |
+
{
|
| 8797 |
+
"epoch": 0.3992961126219805,
|
| 8798 |
+
"grad_norm": 9.015913009643555,
|
| 8799 |
+
"learning_rate": 0.000265353039277572,
|
| 8800 |
+
"loss": 6.4597,
|
| 8801 |
+
"step": 1248
|
| 8802 |
+
},
|
| 8803 |
+
{
|
| 8804 |
+
"epoch": 0.3996160614301712,
|
| 8805 |
+
"grad_norm": 10.161762237548828,
|
| 8806 |
+
"learning_rate": 0.00026516579348573934,
|
| 8807 |
+
"loss": 6.5067,
|
| 8808 |
+
"step": 1249
|
| 8809 |
+
},
|
| 8810 |
+
{
|
| 8811 |
+
"epoch": 0.39993601023836184,
|
| 8812 |
+
"grad_norm": 6.956129550933838,
|
| 8813 |
+
"learning_rate": 0.00026497848916506926,
|
| 8814 |
+
"loss": 6.4775,
|
| 8815 |
+
"step": 1250
|
| 8816 |
+
},
|
| 8817 |
+
{
|
| 8818 |
+
"epoch": 0.40025595904655253,
|
| 8819 |
+
"grad_norm": 5.3303937911987305,
|
| 8820 |
+
"learning_rate": 0.0002647911265142846,
|
| 8821 |
+
"loss": 6.4659,
|
| 8822 |
+
"step": 1251
|
| 8823 |
+
},
|
| 8824 |
+
{
|
| 8825 |
+
"epoch": 0.40057590785474323,
|
| 8826 |
+
"grad_norm": 5.570010662078857,
|
| 8827 |
+
"learning_rate": 0.00026460370573217016,
|
| 8828 |
+
"loss": 6.4517,
|
| 8829 |
+
"step": 1252
|
| 8830 |
+
},
|
| 8831 |
+
{
|
| 8832 |
+
"epoch": 0.4008958566629339,
|
| 8833 |
+
"grad_norm": 6.759359836578369,
|
| 8834 |
+
"learning_rate": 0.0002644162270175723,
|
| 8835 |
+
"loss": 6.4963,
|
| 8836 |
+
"step": 1253
|
| 8837 |
+
},
|
| 8838 |
+
{
|
| 8839 |
+
"epoch": 0.4012158054711246,
|
| 8840 |
+
"grad_norm": 14.08566665649414,
|
| 8841 |
+
"learning_rate": 0.0002642286905693989,
|
| 8842 |
+
"loss": 6.6086,
|
| 8843 |
+
"step": 1254
|
| 8844 |
+
},
|
| 8845 |
+
{
|
| 8846 |
+
"epoch": 0.4015357542793153,
|
| 8847 |
+
"grad_norm": 6.671782493591309,
|
| 8848 |
+
"learning_rate": 0.0002640410965866192,
|
| 8849 |
+
"loss": 6.3949,
|
| 8850 |
+
"step": 1255
|
| 8851 |
+
},
|
| 8852 |
+
{
|
| 8853 |
+
"epoch": 0.401855703087506,
|
| 8854 |
+
"grad_norm": 9.904322624206543,
|
| 8855 |
+
"learning_rate": 0.0002638534452682632,
|
| 8856 |
+
"loss": 6.5513,
|
| 8857 |
+
"step": 1256
|
| 8858 |
+
},
|
| 8859 |
+
{
|
| 8860 |
+
"epoch": 0.4021756518956967,
|
| 8861 |
+
"grad_norm": 11.320886611938477,
|
| 8862 |
+
"learning_rate": 0.00026366573681342213,
|
| 8863 |
+
"loss": 6.4079,
|
| 8864 |
+
"step": 1257
|
| 8865 |
+
},
|
| 8866 |
+
{
|
| 8867 |
+
"epoch": 0.4024956007038874,
|
| 8868 |
+
"grad_norm": 5.8289666175842285,
|
| 8869 |
+
"learning_rate": 0.00026347797142124745,
|
| 8870 |
+
"loss": 6.3216,
|
| 8871 |
+
"step": 1258
|
| 8872 |
+
},
|
| 8873 |
+
{
|
| 8874 |
+
"epoch": 0.40281554951207804,
|
| 8875 |
+
"grad_norm": 13.474091529846191,
|
| 8876 |
+
"learning_rate": 0.0002632901492909513,
|
| 8877 |
+
"loss": 6.4256,
|
| 8878 |
+
"step": 1259
|
| 8879 |
+
},
|
| 8880 |
+
{
|
| 8881 |
+
"epoch": 0.40313549832026874,
|
| 8882 |
+
"grad_norm": 16.653573989868164,
|
| 8883 |
+
"learning_rate": 0.0002631022706218058,
|
| 8884 |
+
"loss": 6.7427,
|
| 8885 |
+
"step": 1260
|
| 8886 |
+
},
|
| 8887 |
+
{
|
| 8888 |
+
"epoch": 0.40345544712845943,
|
| 8889 |
+
"grad_norm": 9.67142391204834,
|
| 8890 |
+
"learning_rate": 0.00026291433561314323,
|
| 8891 |
+
"loss": 6.5105,
|
| 8892 |
+
"step": 1261
|
| 8893 |
+
},
|
| 8894 |
+
{
|
| 8895 |
+
"epoch": 0.4037753959366501,
|
| 8896 |
+
"grad_norm": 7.529284477233887,
|
| 8897 |
+
"learning_rate": 0.0002627263444643557,
|
| 8898 |
+
"loss": 6.4653,
|
| 8899 |
+
"step": 1262
|
| 8900 |
+
},
|
| 8901 |
+
{
|
| 8902 |
+
"epoch": 0.4040953447448408,
|
| 8903 |
+
"grad_norm": 9.487723350524902,
|
| 8904 |
+
"learning_rate": 0.00026253829737489455,
|
| 8905 |
+
"loss": 6.2462,
|
| 8906 |
+
"step": 1263
|
| 8907 |
+
},
|
| 8908 |
+
{
|
| 8909 |
+
"epoch": 0.4044152935530315,
|
| 8910 |
+
"grad_norm": 8.2636079788208,
|
| 8911 |
+
"learning_rate": 0.0002623501945442708,
|
| 8912 |
+
"loss": 6.3761,
|
| 8913 |
+
"step": 1264
|
| 8914 |
+
},
|
| 8915 |
+
{
|
| 8916 |
+
"epoch": 0.4047352423612222,
|
| 8917 |
+
"grad_norm": 5.9740471839904785,
|
| 8918 |
+
"learning_rate": 0.00026216203617205453,
|
| 8919 |
+
"loss": 6.3738,
|
| 8920 |
+
"step": 1265
|
| 8921 |
+
},
|
| 8922 |
+
{
|
| 8923 |
+
"epoch": 0.4050551911694129,
|
| 8924 |
+
"grad_norm": 7.923804759979248,
|
| 8925 |
+
"learning_rate": 0.0002619738224578746,
|
| 8926 |
+
"loss": 6.1306,
|
| 8927 |
+
"step": 1266
|
| 8928 |
+
},
|
| 8929 |
+
{
|
| 8930 |
+
"epoch": 0.4053751399776036,
|
| 8931 |
+
"grad_norm": 6.172807216644287,
|
| 8932 |
+
"learning_rate": 0.0002617855536014188,
|
| 8933 |
+
"loss": 6.4058,
|
| 8934 |
+
"step": 1267
|
| 8935 |
+
},
|
| 8936 |
+
{
|
| 8937 |
+
"epoch": 0.4056950887857943,
|
| 8938 |
+
"grad_norm": 10.529424667358398,
|
| 8939 |
+
"learning_rate": 0.0002615972298024334,
|
| 8940 |
+
"loss": 6.286,
|
| 8941 |
+
"step": 1268
|
| 8942 |
+
},
|
| 8943 |
+
{
|
| 8944 |
+
"epoch": 0.40601503759398494,
|
| 8945 |
+
"grad_norm": 7.2672953605651855,
|
| 8946 |
+
"learning_rate": 0.0002614088512607227,
|
| 8947 |
+
"loss": 6.3429,
|
| 8948 |
+
"step": 1269
|
| 8949 |
+
},
|
| 8950 |
+
{
|
| 8951 |
+
"epoch": 0.40633498640217564,
|
| 8952 |
+
"grad_norm": 12.593116760253906,
|
| 8953 |
+
"learning_rate": 0.0002612204181761493,
|
| 8954 |
+
"loss": 6.5201,
|
| 8955 |
+
"step": 1270
|
| 8956 |
+
},
|
| 8957 |
+
{
|
| 8958 |
+
"epoch": 0.40665493521036633,
|
| 8959 |
+
"grad_norm": 12.00728988647461,
|
| 8960 |
+
"learning_rate": 0.00026103193074863377,
|
| 8961 |
+
"loss": 6.3686,
|
| 8962 |
+
"step": 1271
|
| 8963 |
+
},
|
| 8964 |
+
{
|
| 8965 |
+
"epoch": 0.406974884018557,
|
| 8966 |
+
"grad_norm": 6.9003777503967285,
|
| 8967 |
+
"learning_rate": 0.0002608433891781541,
|
| 8968 |
+
"loss": 6.3091,
|
| 8969 |
+
"step": 1272
|
| 8970 |
+
},
|
| 8971 |
+
{
|
| 8972 |
+
"epoch": 0.4072948328267477,
|
| 8973 |
+
"grad_norm": 11.43606948852539,
|
| 8974 |
+
"learning_rate": 0.0002606547936647458,
|
| 8975 |
+
"loss": 6.4177,
|
| 8976 |
+
"step": 1273
|
| 8977 |
+
},
|
| 8978 |
+
{
|
| 8979 |
+
"epoch": 0.4076147816349384,
|
| 8980 |
+
"grad_norm": 8.18825912475586,
|
| 8981 |
+
"learning_rate": 0.0002604661444085017,
|
| 8982 |
+
"loss": 6.379,
|
| 8983 |
+
"step": 1274
|
| 8984 |
+
},
|
| 8985 |
+
{
|
| 8986 |
+
"epoch": 0.4079347304431291,
|
| 8987 |
+
"grad_norm": 9.621562957763672,
|
| 8988 |
+
"learning_rate": 0.0002602774416095715,
|
| 8989 |
+
"loss": 6.4082,
|
| 8990 |
+
"step": 1275
|
| 8991 |
+
},
|
| 8992 |
+
{
|
| 8993 |
+
"epoch": 0.4082546792513198,
|
| 8994 |
+
"grad_norm": 10.449783325195312,
|
| 8995 |
+
"learning_rate": 0.000260088685468162,
|
| 8996 |
+
"loss": 6.2317,
|
| 8997 |
+
"step": 1276
|
| 8998 |
+
},
|
| 8999 |
+
{
|
| 9000 |
+
"epoch": 0.4085746280595105,
|
| 9001 |
+
"grad_norm": 14.534072875976562,
|
| 9002 |
+
"learning_rate": 0.0002598998761845361,
|
| 9003 |
+
"loss": 6.5101,
|
| 9004 |
+
"step": 1277
|
| 9005 |
+
},
|
| 9006 |
+
{
|
| 9007 |
+
"epoch": 0.40889457686770114,
|
| 9008 |
+
"grad_norm": 19.77684211730957,
|
| 9009 |
+
"learning_rate": 0.0002597110139590135,
|
| 9010 |
+
"loss": 6.4038,
|
| 9011 |
+
"step": 1278
|
| 9012 |
+
},
|
| 9013 |
+
{
|
| 9014 |
+
"epoch": 0.40921452567589184,
|
| 9015 |
+
"grad_norm": 9.12231159210205,
|
| 9016 |
+
"learning_rate": 0.00025952209899197,
|
| 9017 |
+
"loss": 6.2977,
|
| 9018 |
+
"step": 1279
|
| 9019 |
+
},
|
| 9020 |
+
{
|
| 9021 |
+
"epoch": 0.40953447448408253,
|
| 9022 |
+
"grad_norm": 9.007134437561035,
|
| 9023 |
+
"learning_rate": 0.0002593331314838372,
|
| 9024 |
+
"loss": 6.6574,
|
| 9025 |
+
"step": 1280
|
| 9026 |
+
},
|
| 9027 |
+
{
|
| 9028 |
+
"epoch": 0.40985442329227323,
|
| 9029 |
+
"grad_norm": 12.240474700927734,
|
| 9030 |
+
"learning_rate": 0.0002591441116351025,
|
| 9031 |
+
"loss": 6.6026,
|
| 9032 |
+
"step": 1281
|
| 9033 |
+
},
|
| 9034 |
+
{
|
| 9035 |
+
"epoch": 0.4101743721004639,
|
| 9036 |
+
"grad_norm": 18.05267333984375,
|
| 9037 |
+
"learning_rate": 0.000258955039646309,
|
| 9038 |
+
"loss": 6.2505,
|
| 9039 |
+
"step": 1282
|
| 9040 |
+
},
|
| 9041 |
+
{
|
| 9042 |
+
"epoch": 0.4104943209086546,
|
| 9043 |
+
"grad_norm": 12.290742874145508,
|
| 9044 |
+
"learning_rate": 0.000258765915718055,
|
| 9045 |
+
"loss": 6.5617,
|
| 9046 |
+
"step": 1283
|
| 9047 |
+
},
|
| 9048 |
+
{
|
| 9049 |
+
"epoch": 0.4108142697168453,
|
| 9050 |
+
"grad_norm": 15.071090698242188,
|
| 9051 |
+
"learning_rate": 0.0002585767400509937,
|
| 9052 |
+
"loss": 6.5613,
|
| 9053 |
+
"step": 1284
|
| 9054 |
+
},
|
| 9055 |
+
{
|
| 9056 |
+
"epoch": 0.411134218525036,
|
| 9057 |
+
"grad_norm": 8.901190757751465,
|
| 9058 |
+
"learning_rate": 0.00025838751284583346,
|
| 9059 |
+
"loss": 6.3584,
|
| 9060 |
+
"step": 1285
|
| 9061 |
+
},
|
| 9062 |
+
{
|
| 9063 |
+
"epoch": 0.4114541673332267,
|
| 9064 |
+
"grad_norm": 11.258878707885742,
|
| 9065 |
+
"learning_rate": 0.0002581982343033374,
|
| 9066 |
+
"loss": 6.5876,
|
| 9067 |
+
"step": 1286
|
| 9068 |
+
},
|
| 9069 |
+
{
|
| 9070 |
+
"epoch": 0.41177411614141735,
|
| 9071 |
+
"grad_norm": 9.922440528869629,
|
| 9072 |
+
"learning_rate": 0.00025800890462432277,
|
| 9073 |
+
"loss": 6.2798,
|
| 9074 |
+
"step": 1287
|
| 9075 |
+
},
|
| 9076 |
+
{
|
| 9077 |
+
"epoch": 0.41209406494960804,
|
| 9078 |
+
"grad_norm": 7.8548150062561035,
|
| 9079 |
+
"learning_rate": 0.0002578195240096614,
|
| 9080 |
+
"loss": 6.4563,
|
| 9081 |
+
"step": 1288
|
| 9082 |
+
},
|
| 9083 |
+
{
|
| 9084 |
+
"epoch": 0.41241401375779874,
|
| 9085 |
+
"grad_norm": 9.597755432128906,
|
| 9086 |
+
"learning_rate": 0.0002576300926602788,
|
| 9087 |
+
"loss": 6.2798,
|
| 9088 |
+
"step": 1289
|
| 9089 |
+
},
|
| 9090 |
+
{
|
| 9091 |
+
"epoch": 0.41273396256598943,
|
| 9092 |
+
"grad_norm": 5.551302433013916,
|
| 9093 |
+
"learning_rate": 0.0002574406107771548,
|
| 9094 |
+
"loss": 6.3571,
|
| 9095 |
+
"step": 1290
|
| 9096 |
+
},
|
| 9097 |
+
{
|
| 9098 |
+
"epoch": 0.4130539113741801,
|
| 9099 |
+
"grad_norm": 9.343033790588379,
|
| 9100 |
+
"learning_rate": 0.0002572510785613225,
|
| 9101 |
+
"loss": 6.375,
|
| 9102 |
+
"step": 1291
|
| 9103 |
+
},
|
| 9104 |
+
{
|
| 9105 |
+
"epoch": 0.4133738601823708,
|
| 9106 |
+
"grad_norm": 6.203455924987793,
|
| 9107 |
+
"learning_rate": 0.0002570614962138682,
|
| 9108 |
+
"loss": 6.032,
|
| 9109 |
+
"step": 1292
|
| 9110 |
+
},
|
| 9111 |
+
{
|
| 9112 |
+
"epoch": 0.4136938089905615,
|
| 9113 |
+
"grad_norm": 7.929701328277588,
|
| 9114 |
+
"learning_rate": 0.00025687186393593206,
|
| 9115 |
+
"loss": 6.3534,
|
| 9116 |
+
"step": 1293
|
| 9117 |
+
},
|
| 9118 |
+
{
|
| 9119 |
+
"epoch": 0.4140137577987522,
|
| 9120 |
+
"grad_norm": 12.085379600524902,
|
| 9121 |
+
"learning_rate": 0.0002566821819287065,
|
| 9122 |
+
"loss": 6.4062,
|
| 9123 |
+
"step": 1294
|
| 9124 |
+
},
|
| 9125 |
+
{
|
| 9126 |
+
"epoch": 0.4143337066069429,
|
| 9127 |
+
"grad_norm": 8.04161262512207,
|
| 9128 |
+
"learning_rate": 0.0002564924503934372,
|
| 9129 |
+
"loss": 6.4253,
|
| 9130 |
+
"step": 1295
|
| 9131 |
+
},
|
| 9132 |
+
{
|
| 9133 |
+
"epoch": 0.4146536554151336,
|
| 9134 |
+
"grad_norm": 11.36021614074707,
|
| 9135 |
+
"learning_rate": 0.00025630266953142214,
|
| 9136 |
+
"loss": 6.1811,
|
| 9137 |
+
"step": 1296
|
| 9138 |
+
},
|
| 9139 |
+
{
|
| 9140 |
+
"epoch": 0.41497360422332424,
|
| 9141 |
+
"grad_norm": 12.349037170410156,
|
| 9142 |
+
"learning_rate": 0.00025611283954401175,
|
| 9143 |
+
"loss": 6.4346,
|
| 9144 |
+
"step": 1297
|
| 9145 |
+
},
|
| 9146 |
+
{
|
| 9147 |
+
"epoch": 0.41529355303151494,
|
| 9148 |
+
"grad_norm": 11.792349815368652,
|
| 9149 |
+
"learning_rate": 0.00025592296063260835,
|
| 9150 |
+
"loss": 6.4392,
|
| 9151 |
+
"step": 1298
|
| 9152 |
+
},
|
| 9153 |
+
{
|
| 9154 |
+
"epoch": 0.41561350183970563,
|
| 9155 |
+
"grad_norm": 9.208128929138184,
|
| 9156 |
+
"learning_rate": 0.00025573303299866653,
|
| 9157 |
+
"loss": 6.3419,
|
| 9158 |
+
"step": 1299
|
| 9159 |
+
},
|
| 9160 |
+
{
|
| 9161 |
+
"epoch": 0.41593345064789633,
|
| 9162 |
+
"grad_norm": 14.85993766784668,
|
| 9163 |
+
"learning_rate": 0.0002555430568436923,
|
| 9164 |
+
"loss": 6.3156,
|
| 9165 |
+
"step": 1300
|
| 9166 |
+
},
|
| 9167 |
+
{
|
| 9168 |
+
"epoch": 0.416253399456087,
|
| 9169 |
+
"grad_norm": 14.815731048583984,
|
| 9170 |
+
"learning_rate": 0.0002553530323692432,
|
| 9171 |
+
"loss": 6.3872,
|
| 9172 |
+
"step": 1301
|
| 9173 |
+
},
|
| 9174 |
+
{
|
| 9175 |
+
"epoch": 0.4165733482642777,
|
| 9176 |
+
"grad_norm": 9.829910278320312,
|
| 9177 |
+
"learning_rate": 0.0002551629597769282,
|
| 9178 |
+
"loss": 6.4641,
|
| 9179 |
+
"step": 1302
|
| 9180 |
+
},
|
| 9181 |
+
{
|
| 9182 |
+
"epoch": 0.4168932970724684,
|
| 9183 |
+
"grad_norm": 16.248035430908203,
|
| 9184 |
+
"learning_rate": 0.000254972839268407,
|
| 9185 |
+
"loss": 6.2669,
|
| 9186 |
+
"step": 1303
|
| 9187 |
+
},
|
| 9188 |
+
{
|
| 9189 |
+
"epoch": 0.4172132458806591,
|
| 9190 |
+
"grad_norm": 11.95917797088623,
|
| 9191 |
+
"learning_rate": 0.00025478267104539053,
|
| 9192 |
+
"loss": 6.4028,
|
| 9193 |
+
"step": 1304
|
| 9194 |
+
},
|
| 9195 |
+
{
|
| 9196 |
+
"epoch": 0.4175331946888498,
|
| 9197 |
+
"grad_norm": 10.625663757324219,
|
| 9198 |
+
"learning_rate": 0.00025459245530964,
|
| 9199 |
+
"loss": 6.4577,
|
| 9200 |
+
"step": 1305
|
| 9201 |
+
},
|
| 9202 |
+
{
|
| 9203 |
+
"epoch": 0.41785314349704045,
|
| 9204 |
+
"grad_norm": 6.940323352813721,
|
| 9205 |
+
"learning_rate": 0.00025440219226296725,
|
| 9206 |
+
"loss": 6.3556,
|
| 9207 |
+
"step": 1306
|
| 9208 |
+
},
|
| 9209 |
+
{
|
| 9210 |
+
"epoch": 0.41817309230523114,
|
| 9211 |
+
"grad_norm": 10.21389102935791,
|
| 9212 |
+
"learning_rate": 0.0002542118821072342,
|
| 9213 |
+
"loss": 6.4131,
|
| 9214 |
+
"step": 1307
|
| 9215 |
+
},
|
| 9216 |
+
{
|
| 9217 |
+
"epoch": 0.41849304111342184,
|
| 9218 |
+
"grad_norm": 13.550383567810059,
|
| 9219 |
+
"learning_rate": 0.0002540215250443528,
|
| 9220 |
+
"loss": 6.4616,
|
| 9221 |
+
"step": 1308
|
| 9222 |
+
},
|
| 9223 |
+
{
|
| 9224 |
+
"epoch": 0.41881298992161253,
|
| 9225 |
+
"grad_norm": 9.647680282592773,
|
| 9226 |
+
"learning_rate": 0.0002538311212762847,
|
| 9227 |
+
"loss": 6.0977,
|
| 9228 |
+
"step": 1309
|
| 9229 |
+
},
|
| 9230 |
+
{
|
| 9231 |
+
"epoch": 0.41913293872980323,
|
| 9232 |
+
"grad_norm": 10.69679069519043,
|
| 9233 |
+
"learning_rate": 0.0002536406710050412,
|
| 9234 |
+
"loss": 6.1197,
|
| 9235 |
+
"step": 1310
|
| 9236 |
+
},
|
| 9237 |
+
{
|
| 9238 |
+
"epoch": 0.4194528875379939,
|
| 9239 |
+
"grad_norm": 15.03259563446045,
|
| 9240 |
+
"learning_rate": 0.0002534501744326829,
|
| 9241 |
+
"loss": 6.497,
|
| 9242 |
+
"step": 1311
|
| 9243 |
+
},
|
| 9244 |
+
{
|
| 9245 |
+
"epoch": 0.4197728363461846,
|
| 9246 |
+
"grad_norm": 14.628292083740234,
|
| 9247 |
+
"learning_rate": 0.00025325963176131946,
|
| 9248 |
+
"loss": 6.3991,
|
| 9249 |
+
"step": 1312
|
| 9250 |
+
},
|
| 9251 |
+
{
|
| 9252 |
+
"epoch": 0.4200927851543753,
|
| 9253 |
+
"grad_norm": 13.270496368408203,
|
| 9254 |
+
"learning_rate": 0.0002530690431931096,
|
| 9255 |
+
"loss": 6.7255,
|
| 9256 |
+
"step": 1313
|
| 9257 |
+
},
|
| 9258 |
+
{
|
| 9259 |
+
"epoch": 0.420412733962566,
|
| 9260 |
+
"grad_norm": 7.5467143058776855,
|
| 9261 |
+
"learning_rate": 0.00025287840893026064,
|
| 9262 |
+
"loss": 6.0528,
|
| 9263 |
+
"step": 1314
|
| 9264 |
+
},
|
| 9265 |
+
{
|
| 9266 |
+
"epoch": 0.4207326827707567,
|
| 9267 |
+
"grad_norm": 9.551958084106445,
|
| 9268 |
+
"learning_rate": 0.0002526877291750283,
|
| 9269 |
+
"loss": 6.1203,
|
| 9270 |
+
"step": 1315
|
| 9271 |
+
},
|
| 9272 |
+
{
|
| 9273 |
+
"epoch": 0.42105263157894735,
|
| 9274 |
+
"grad_norm": 9.613594055175781,
|
| 9275 |
+
"learning_rate": 0.0002524970041297166,
|
| 9276 |
+
"loss": 6.3185,
|
| 9277 |
+
"step": 1316
|
| 9278 |
+
},
|
| 9279 |
+
{
|
| 9280 |
+
"epoch": 0.42137258038713804,
|
| 9281 |
+
"grad_norm": 16.20762825012207,
|
| 9282 |
+
"learning_rate": 0.00025230623399667777,
|
| 9283 |
+
"loss": 6.2841,
|
| 9284 |
+
"step": 1317
|
| 9285 |
+
},
|
| 9286 |
+
{
|
| 9287 |
+
"epoch": 0.42169252919532874,
|
| 9288 |
+
"grad_norm": 19.544544219970703,
|
| 9289 |
+
"learning_rate": 0.0002521154189783118,
|
| 9290 |
+
"loss": 6.2749,
|
| 9291 |
+
"step": 1318
|
| 9292 |
+
},
|
| 9293 |
+
{
|
| 9294 |
+
"epoch": 0.42201247800351943,
|
| 9295 |
+
"grad_norm": 30.432388305664062,
|
| 9296 |
+
"learning_rate": 0.00025192455927706617,
|
| 9297 |
+
"loss": 6.417,
|
| 9298 |
+
"step": 1319
|
| 9299 |
+
},
|
| 9300 |
+
{
|
| 9301 |
+
"epoch": 0.4223324268117101,
|
| 9302 |
+
"grad_norm": 12.61909294128418,
|
| 9303 |
+
"learning_rate": 0.0002517336550954359,
|
| 9304 |
+
"loss": 6.5085,
|
| 9305 |
+
"step": 1320
|
| 9306 |
+
},
|
| 9307 |
+
{
|
| 9308 |
+
"epoch": 0.4226523756199008,
|
| 9309 |
+
"grad_norm": 12.66275691986084,
|
| 9310 |
+
"learning_rate": 0.0002515427066359632,
|
| 9311 |
+
"loss": 6.239,
|
| 9312 |
+
"step": 1321
|
| 9313 |
+
},
|
| 9314 |
+
{
|
| 9315 |
+
"epoch": 0.4229723244280915,
|
| 9316 |
+
"grad_norm": 32.47134780883789,
|
| 9317 |
+
"learning_rate": 0.0002513517141012371,
|
| 9318 |
+
"loss": 6.5225,
|
| 9319 |
+
"step": 1322
|
| 9320 |
+
},
|
| 9321 |
+
{
|
| 9322 |
+
"epoch": 0.4232922732362822,
|
| 9323 |
+
"grad_norm": 15.030749320983887,
|
| 9324 |
+
"learning_rate": 0.0002511606776938936,
|
| 9325 |
+
"loss": 6.2803,
|
| 9326 |
+
"step": 1323
|
| 9327 |
+
},
|
| 9328 |
+
{
|
| 9329 |
+
"epoch": 0.4236122220444729,
|
| 9330 |
+
"grad_norm": 10.074403762817383,
|
| 9331 |
+
"learning_rate": 0.00025096959761661524,
|
| 9332 |
+
"loss": 6.3504,
|
| 9333 |
+
"step": 1324
|
| 9334 |
+
},
|
| 9335 |
+
{
|
| 9336 |
+
"epoch": 0.42393217085266355,
|
| 9337 |
+
"grad_norm": 13.217743873596191,
|
| 9338 |
+
"learning_rate": 0.0002507784740721306,
|
| 9339 |
+
"loss": 6.2698,
|
| 9340 |
+
"step": 1325
|
| 9341 |
+
},
|
| 9342 |
+
{
|
| 9343 |
+
"epoch": 0.42425211966085424,
|
| 9344 |
+
"grad_norm": 16.90913200378418,
|
| 9345 |
+
"learning_rate": 0.0002505873072632148,
|
| 9346 |
+
"loss": 6.2857,
|
| 9347 |
+
"step": 1326
|
| 9348 |
+
},
|
| 9349 |
+
{
|
| 9350 |
+
"epoch": 0.42457206846904494,
|
| 9351 |
+
"grad_norm": 17.0783634185791,
|
| 9352 |
+
"learning_rate": 0.0002503960973926886,
|
| 9353 |
+
"loss": 6.2195,
|
| 9354 |
+
"step": 1327
|
| 9355 |
+
},
|
| 9356 |
+
{
|
| 9357 |
+
"epoch": 0.42489201727723563,
|
| 9358 |
+
"grad_norm": 7.002859115600586,
|
| 9359 |
+
"learning_rate": 0.00025020484466341844,
|
| 9360 |
+
"loss": 6.2902,
|
| 9361 |
+
"step": 1328
|
| 9362 |
+
},
|
| 9363 |
+
{
|
| 9364 |
+
"epoch": 0.42521196608542633,
|
| 9365 |
+
"grad_norm": 14.14289379119873,
|
| 9366 |
+
"learning_rate": 0.0002500135492783163,
|
| 9367 |
+
"loss": 6.3848,
|
| 9368 |
+
"step": 1329
|
| 9369 |
+
},
|
| 9370 |
+
{
|
| 9371 |
+
"epoch": 0.425531914893617,
|
| 9372 |
+
"grad_norm": 9.69235897064209,
|
| 9373 |
+
"learning_rate": 0.0002498222114403395,
|
| 9374 |
+
"loss": 6.1554,
|
| 9375 |
+
"step": 1330
|
| 9376 |
+
},
|
| 9377 |
+
{
|
| 9378 |
+
"epoch": 0.4258518637018077,
|
| 9379 |
+
"grad_norm": 30.017566680908203,
|
| 9380 |
+
"learning_rate": 0.0002496308313524902,
|
| 9381 |
+
"loss": 6.3937,
|
| 9382 |
+
"step": 1331
|
| 9383 |
+
},
|
| 9384 |
+
{
|
| 9385 |
+
"epoch": 0.4261718125099984,
|
| 9386 |
+
"grad_norm": 13.781780242919922,
|
| 9387 |
+
"learning_rate": 0.00024943940921781557,
|
| 9388 |
+
"loss": 6.1807,
|
| 9389 |
+
"step": 1332
|
| 9390 |
+
},
|
| 9391 |
+
{
|
| 9392 |
+
"epoch": 0.4264917613181891,
|
| 9393 |
+
"grad_norm": 10.11452579498291,
|
| 9394 |
+
"learning_rate": 0.0002492479452394072,
|
| 9395 |
+
"loss": 6.385,
|
| 9396 |
+
"step": 1333
|
| 9397 |
+
},
|
| 9398 |
+
{
|
| 9399 |
+
"epoch": 0.4268117101263798,
|
| 9400 |
+
"grad_norm": 18.071516036987305,
|
| 9401 |
+
"learning_rate": 0.00024905643962040133,
|
| 9402 |
+
"loss": 6.35,
|
| 9403 |
+
"step": 1334
|
| 9404 |
+
},
|
| 9405 |
+
{
|
| 9406 |
+
"epoch": 0.42713165893457045,
|
| 9407 |
+
"grad_norm": 12.902596473693848,
|
| 9408 |
+
"learning_rate": 0.00024886489256397825,
|
| 9409 |
+
"loss": 6.4579,
|
| 9410 |
+
"step": 1335
|
| 9411 |
+
},
|
| 9412 |
+
{
|
| 9413 |
+
"epoch": 0.42745160774276114,
|
| 9414 |
+
"grad_norm": 9.532163619995117,
|
| 9415 |
+
"learning_rate": 0.000248673304273362,
|
| 9416 |
+
"loss": 6.2096,
|
| 9417 |
+
"step": 1336
|
| 9418 |
+
},
|
| 9419 |
+
{
|
| 9420 |
+
"epoch": 0.42777155655095184,
|
| 9421 |
+
"grad_norm": 20.802745819091797,
|
| 9422 |
+
"learning_rate": 0.0002484816749518207,
|
| 9423 |
+
"loss": 6.4637,
|
| 9424 |
+
"step": 1337
|
| 9425 |
+
},
|
| 9426 |
+
{
|
| 9427 |
+
"epoch": 0.42809150535914253,
|
| 9428 |
+
"grad_norm": 11.478161811828613,
|
| 9429 |
+
"learning_rate": 0.00024829000480266594,
|
| 9430 |
+
"loss": 6.3374,
|
| 9431 |
+
"step": 1338
|
| 9432 |
+
},
|
| 9433 |
+
{
|
| 9434 |
+
"epoch": 0.4284114541673332,
|
| 9435 |
+
"grad_norm": 8.678462028503418,
|
| 9436 |
+
"learning_rate": 0.0002480982940292524,
|
| 9437 |
+
"loss": 6.2866,
|
| 9438 |
+
"step": 1339
|
| 9439 |
+
},
|
| 9440 |
+
{
|
| 9441 |
+
"epoch": 0.4287314029755239,
|
| 9442 |
+
"grad_norm": 9.180252075195312,
|
| 9443 |
+
"learning_rate": 0.0002479065428349782,
|
| 9444 |
+
"loss": 6.3384,
|
| 9445 |
+
"step": 1340
|
| 9446 |
+
},
|
| 9447 |
+
{
|
| 9448 |
+
"epoch": 0.4290513517837146,
|
| 9449 |
+
"grad_norm": 11.826372146606445,
|
| 9450 |
+
"learning_rate": 0.00024771475142328406,
|
| 9451 |
+
"loss": 6.3178,
|
| 9452 |
+
"step": 1341
|
| 9453 |
+
},
|
| 9454 |
+
{
|
| 9455 |
+
"epoch": 0.4293713005919053,
|
| 9456 |
+
"grad_norm": 8.149714469909668,
|
| 9457 |
+
"learning_rate": 0.00024752291999765344,
|
| 9458 |
+
"loss": 6.4101,
|
| 9459 |
+
"step": 1342
|
| 9460 |
+
},
|
| 9461 |
+
{
|
| 9462 |
+
"epoch": 0.429691249400096,
|
| 9463 |
+
"grad_norm": 8.667763710021973,
|
| 9464 |
+
"learning_rate": 0.0002473310487616123,
|
| 9465 |
+
"loss": 6.2569,
|
| 9466 |
+
"step": 1343
|
| 9467 |
+
},
|
| 9468 |
+
{
|
| 9469 |
+
"epoch": 0.43001119820828665,
|
| 9470 |
+
"grad_norm": 8.507345199584961,
|
| 9471 |
+
"learning_rate": 0.00024713913791872896,
|
| 9472 |
+
"loss": 6.0874,
|
| 9473 |
+
"step": 1344
|
| 9474 |
+
},
|
| 9475 |
+
{
|
| 9476 |
+
"epoch": 0.43033114701647734,
|
| 9477 |
+
"grad_norm": 12.065765380859375,
|
| 9478 |
+
"learning_rate": 0.00024694718767261336,
|
| 9479 |
+
"loss": 6.2609,
|
| 9480 |
+
"step": 1345
|
| 9481 |
+
},
|
| 9482 |
+
{
|
| 9483 |
+
"epoch": 0.43065109582466804,
|
| 9484 |
+
"grad_norm": 7.034212112426758,
|
| 9485 |
+
"learning_rate": 0.00024675519822691777,
|
| 9486 |
+
"loss": 6.3743,
|
| 9487 |
+
"step": 1346
|
| 9488 |
+
},
|
| 9489 |
+
{
|
| 9490 |
+
"epoch": 0.43097104463285874,
|
| 9491 |
+
"grad_norm": 8.030159950256348,
|
| 9492 |
+
"learning_rate": 0.0002465631697853357,
|
| 9493 |
+
"loss": 6.1686,
|
| 9494 |
+
"step": 1347
|
| 9495 |
+
},
|
| 9496 |
+
{
|
| 9497 |
+
"epoch": 0.43129099344104943,
|
| 9498 |
+
"grad_norm": 36.66036605834961,
|
| 9499 |
+
"learning_rate": 0.00024637110255160203,
|
| 9500 |
+
"loss": 6.2742,
|
| 9501 |
+
"step": 1348
|
| 9502 |
+
},
|
| 9503 |
+
{
|
| 9504 |
+
"epoch": 0.4316109422492401,
|
| 9505 |
+
"grad_norm": 12.500212669372559,
|
| 9506 |
+
"learning_rate": 0.00024617899672949305,
|
| 9507 |
+
"loss": 6.1638,
|
| 9508 |
+
"step": 1349
|
| 9509 |
+
},
|
| 9510 |
+
{
|
| 9511 |
+
"epoch": 0.4319308910574308,
|
| 9512 |
+
"grad_norm": 20.530799865722656,
|
| 9513 |
+
"learning_rate": 0.0002459868525228257,
|
| 9514 |
+
"loss": 6.5203,
|
| 9515 |
+
"step": 1350
|
| 9516 |
+
},
|
| 9517 |
+
{
|
| 9518 |
+
"epoch": 0.4322508398656215,
|
| 9519 |
+
"grad_norm": 6.235330104827881,
|
| 9520 |
+
"learning_rate": 0.0002457946701354578,
|
| 9521 |
+
"loss": 6.2691,
|
| 9522 |
+
"step": 1351
|
| 9523 |
+
},
|
| 9524 |
+
{
|
| 9525 |
+
"epoch": 0.4325707886738122,
|
| 9526 |
+
"grad_norm": 10.153731346130371,
|
| 9527 |
+
"learning_rate": 0.00024560244977128774,
|
| 9528 |
+
"loss": 6.3439,
|
| 9529 |
+
"step": 1352
|
| 9530 |
+
},
|
| 9531 |
+
{
|
| 9532 |
+
"epoch": 0.43289073748200285,
|
| 9533 |
+
"grad_norm": 9.818161964416504,
|
| 9534 |
+
"learning_rate": 0.000245410191634254,
|
| 9535 |
+
"loss": 6.1459,
|
| 9536 |
+
"step": 1353
|
| 9537 |
+
},
|
| 9538 |
+
{
|
| 9539 |
+
"epoch": 0.43321068629019355,
|
| 9540 |
+
"grad_norm": 16.71061134338379,
|
| 9541 |
+
"learning_rate": 0.0002452178959283353,
|
| 9542 |
+
"loss": 6.4604,
|
| 9543 |
+
"step": 1354
|
| 9544 |
+
},
|
| 9545 |
+
{
|
| 9546 |
+
"epoch": 0.43353063509838424,
|
| 9547 |
+
"grad_norm": 9.193108558654785,
|
| 9548 |
+
"learning_rate": 0.00024502556285755023,
|
| 9549 |
+
"loss": 6.3588,
|
| 9550 |
+
"step": 1355
|
| 9551 |
+
},
|
| 9552 |
+
{
|
| 9553 |
+
"epoch": 0.43385058390657494,
|
| 9554 |
+
"grad_norm": 9.433093070983887,
|
| 9555 |
+
"learning_rate": 0.00024483319262595687,
|
| 9556 |
+
"loss": 6.4425,
|
| 9557 |
+
"step": 1356
|
| 9558 |
+
},
|
| 9559 |
+
{
|
| 9560 |
+
"epoch": 0.43417053271476563,
|
| 9561 |
+
"grad_norm": 8.100132942199707,
|
| 9562 |
+
"learning_rate": 0.0002446407854376529,
|
| 9563 |
+
"loss": 6.434,
|
| 9564 |
+
"step": 1357
|
| 9565 |
+
},
|
| 9566 |
+
{
|
| 9567 |
+
"epoch": 0.43449048152295633,
|
| 9568 |
+
"grad_norm": 8.084160804748535,
|
| 9569 |
+
"learning_rate": 0.00024444834149677506,
|
| 9570 |
+
"loss": 6.4744,
|
| 9571 |
+
"step": 1358
|
| 9572 |
+
},
|
| 9573 |
+
{
|
| 9574 |
+
"epoch": 0.434810430331147,
|
| 9575 |
+
"grad_norm": 7.267811298370361,
|
| 9576 |
+
"learning_rate": 0.00024425586100749916,
|
| 9577 |
+
"loss": 6.0862,
|
| 9578 |
+
"step": 1359
|
| 9579 |
+
},
|
| 9580 |
+
{
|
| 9581 |
+
"epoch": 0.4351303791393377,
|
| 9582 |
+
"grad_norm": 6.129974365234375,
|
| 9583 |
+
"learning_rate": 0.0002440633441740398,
|
| 9584 |
+
"loss": 6.3388,
|
| 9585 |
+
"step": 1360
|
| 9586 |
+
},
|
| 9587 |
+
{
|
| 9588 |
+
"epoch": 0.4354503279475284,
|
| 9589 |
+
"grad_norm": 10.397089004516602,
|
| 9590 |
+
"learning_rate": 0.00024387079120065014,
|
| 9591 |
+
"loss": 6.349,
|
| 9592 |
+
"step": 1361
|
| 9593 |
+
},
|
| 9594 |
+
{
|
| 9595 |
+
"epoch": 0.4357702767557191,
|
| 9596 |
+
"grad_norm": 10.434456825256348,
|
| 9597 |
+
"learning_rate": 0.00024367820229162157,
|
| 9598 |
+
"loss": 6.1166,
|
| 9599 |
+
"step": 1362
|
| 9600 |
+
},
|
| 9601 |
+
{
|
| 9602 |
+
"epoch": 0.43609022556390975,
|
| 9603 |
+
"grad_norm": 8.7677001953125,
|
| 9604 |
+
"learning_rate": 0.00024348557765128384,
|
| 9605 |
+
"loss": 6.2898,
|
| 9606 |
+
"step": 1363
|
| 9607 |
+
},
|
| 9608 |
+
{
|
| 9609 |
+
"epoch": 0.43641017437210045,
|
| 9610 |
+
"grad_norm": 11.238868713378906,
|
| 9611 |
+
"learning_rate": 0.0002432929174840044,
|
| 9612 |
+
"loss": 6.1991,
|
| 9613 |
+
"step": 1364
|
| 9614 |
+
},
|
| 9615 |
+
{
|
| 9616 |
+
"epoch": 0.43673012318029114,
|
| 9617 |
+
"grad_norm": 7.583499431610107,
|
| 9618 |
+
"learning_rate": 0.00024310022199418833,
|
| 9619 |
+
"loss": 6.0778,
|
| 9620 |
+
"step": 1365
|
| 9621 |
+
},
|
| 9622 |
+
{
|
| 9623 |
+
"epoch": 0.43705007198848184,
|
| 9624 |
+
"grad_norm": 11.78762149810791,
|
| 9625 |
+
"learning_rate": 0.0002429074913862786,
|
| 9626 |
+
"loss": 6.3973,
|
| 9627 |
+
"step": 1366
|
| 9628 |
+
},
|
| 9629 |
+
{
|
| 9630 |
+
"epoch": 0.43737002079667253,
|
| 9631 |
+
"grad_norm": 7.4696431159973145,
|
| 9632 |
+
"learning_rate": 0.0002427147258647549,
|
| 9633 |
+
"loss": 6.0472,
|
| 9634 |
+
"step": 1367
|
| 9635 |
+
},
|
| 9636 |
+
{
|
| 9637 |
+
"epoch": 0.4376899696048632,
|
| 9638 |
+
"grad_norm": 11.547538757324219,
|
| 9639 |
+
"learning_rate": 0.00024252192563413435,
|
| 9640 |
+
"loss": 6.4374,
|
| 9641 |
+
"step": 1368
|
| 9642 |
+
},
|
| 9643 |
+
{
|
| 9644 |
+
"epoch": 0.4380099184130539,
|
| 9645 |
+
"grad_norm": 11.28888988494873,
|
| 9646 |
+
"learning_rate": 0.00024232909089897065,
|
| 9647 |
+
"loss": 6.3143,
|
| 9648 |
+
"step": 1369
|
| 9649 |
+
},
|
| 9650 |
+
{
|
| 9651 |
+
"epoch": 0.4383298672212446,
|
| 9652 |
+
"grad_norm": 12.933130264282227,
|
| 9653 |
+
"learning_rate": 0.00024213622186385436,
|
| 9654 |
+
"loss": 6.26,
|
| 9655 |
+
"step": 1370
|
| 9656 |
+
},
|
| 9657 |
+
{
|
| 9658 |
+
"epoch": 0.4386498160294353,
|
| 9659 |
+
"grad_norm": 10.536492347717285,
|
| 9660 |
+
"learning_rate": 0.00024194331873341222,
|
| 9661 |
+
"loss": 6.2753,
|
| 9662 |
+
"step": 1371
|
| 9663 |
+
},
|
| 9664 |
+
{
|
| 9665 |
+
"epoch": 0.43896976483762595,
|
| 9666 |
+
"grad_norm": 18.023189544677734,
|
| 9667 |
+
"learning_rate": 0.00024175038171230718,
|
| 9668 |
+
"loss": 6.4572,
|
| 9669 |
+
"step": 1372
|
| 9670 |
+
},
|
| 9671 |
+
{
|
| 9672 |
+
"epoch": 0.43928971364581665,
|
| 9673 |
+
"grad_norm": 15.396516799926758,
|
| 9674 |
+
"learning_rate": 0.00024155741100523824,
|
| 9675 |
+
"loss": 6.0828,
|
| 9676 |
+
"step": 1373
|
| 9677 |
+
},
|
| 9678 |
+
{
|
| 9679 |
+
"epoch": 0.43960966245400734,
|
| 9680 |
+
"grad_norm": 12.426717758178711,
|
| 9681 |
+
"learning_rate": 0.00024136440681694007,
|
| 9682 |
+
"loss": 6.3676,
|
| 9683 |
+
"step": 1374
|
| 9684 |
+
},
|
| 9685 |
+
{
|
| 9686 |
+
"epoch": 0.43992961126219804,
|
| 9687 |
+
"grad_norm": 10.206119537353516,
|
| 9688 |
+
"learning_rate": 0.00024117136935218283,
|
| 9689 |
+
"loss": 6.1777,
|
| 9690 |
+
"step": 1375
|
| 9691 |
+
},
|
| 9692 |
+
{
|
| 9693 |
+
"epoch": 0.44024956007038873,
|
| 9694 |
+
"grad_norm": 17.221784591674805,
|
| 9695 |
+
"learning_rate": 0.00024097829881577205,
|
| 9696 |
+
"loss": 6.2916,
|
| 9697 |
+
"step": 1376
|
| 9698 |
+
},
|
| 9699 |
+
{
|
| 9700 |
+
"epoch": 0.44056950887857943,
|
| 9701 |
+
"grad_norm": 72.50157165527344,
|
| 9702 |
+
"learning_rate": 0.0002407851954125484,
|
| 9703 |
+
"loss": 6.234,
|
| 9704 |
+
"step": 1377
|
| 9705 |
+
},
|
| 9706 |
+
{
|
| 9707 |
+
"epoch": 0.4408894576867701,
|
| 9708 |
+
"grad_norm": 7.173483848571777,
|
| 9709 |
+
"learning_rate": 0.0002405920593473872,
|
| 9710 |
+
"loss": 6.2403,
|
| 9711 |
+
"step": 1378
|
| 9712 |
+
},
|
| 9713 |
+
{
|
| 9714 |
+
"epoch": 0.4412094064949608,
|
| 9715 |
+
"grad_norm": 7.698493480682373,
|
| 9716 |
+
"learning_rate": 0.0002403988908251988,
|
| 9717 |
+
"loss": 6.2424,
|
| 9718 |
+
"step": 1379
|
| 9719 |
+
},
|
| 9720 |
+
{
|
| 9721 |
+
"epoch": 0.4415293553031515,
|
| 9722 |
+
"grad_norm": 11.574590682983398,
|
| 9723 |
+
"learning_rate": 0.00024020569005092749,
|
| 9724 |
+
"loss": 6.2996,
|
| 9725 |
+
"step": 1380
|
| 9726 |
+
},
|
| 9727 |
+
{
|
| 9728 |
+
"epoch": 0.4418493041113422,
|
| 9729 |
+
"grad_norm": 10.622515678405762,
|
| 9730 |
+
"learning_rate": 0.00024001245722955216,
|
| 9731 |
+
"loss": 6.158,
|
| 9732 |
+
"step": 1381
|
| 9733 |
+
},
|
| 9734 |
+
{
|
| 9735 |
+
"epoch": 0.44216925291953285,
|
| 9736 |
+
"grad_norm": 11.94666576385498,
|
| 9737 |
+
"learning_rate": 0.00023981919256608564,
|
| 9738 |
+
"loss": 6.165,
|
| 9739 |
+
"step": 1382
|
| 9740 |
+
},
|
| 9741 |
+
{
|
| 9742 |
+
"epoch": 0.44248920172772355,
|
| 9743 |
+
"grad_norm": 10.147541046142578,
|
| 9744 |
+
"learning_rate": 0.00023962589626557446,
|
| 9745 |
+
"loss": 6.2964,
|
| 9746 |
+
"step": 1383
|
| 9747 |
+
},
|
| 9748 |
+
{
|
| 9749 |
+
"epoch": 0.44280915053591424,
|
| 9750 |
+
"grad_norm": 9.275951385498047,
|
| 9751 |
+
"learning_rate": 0.00023943256853309862,
|
| 9752 |
+
"loss": 6.3666,
|
| 9753 |
+
"step": 1384
|
| 9754 |
+
},
|
| 9755 |
+
{
|
| 9756 |
+
"epoch": 0.44312909934410494,
|
| 9757 |
+
"grad_norm": 12.353401184082031,
|
| 9758 |
+
"learning_rate": 0.0002392392095737718,
|
| 9759 |
+
"loss": 6.2231,
|
| 9760 |
+
"step": 1385
|
| 9761 |
+
},
|
| 9762 |
+
{
|
| 9763 |
+
"epoch": 0.44344904815229563,
|
| 9764 |
+
"grad_norm": 145.6978759765625,
|
| 9765 |
+
"learning_rate": 0.0002390458195927404,
|
| 9766 |
+
"loss": 6.0643,
|
| 9767 |
+
"step": 1386
|
| 9768 |
+
},
|
| 9769 |
+
{
|
| 9770 |
+
"epoch": 0.44376899696048633,
|
| 9771 |
+
"grad_norm": 9.182119369506836,
|
| 9772 |
+
"learning_rate": 0.00023885239879518406,
|
| 9773 |
+
"loss": 6.2337,
|
| 9774 |
+
"step": 1387
|
| 9775 |
+
},
|
| 9776 |
+
{
|
| 9777 |
+
"epoch": 0.444088945768677,
|
| 9778 |
+
"grad_norm": 10.039813995361328,
|
| 9779 |
+
"learning_rate": 0.000238658947386315,
|
| 9780 |
+
"loss": 6.3979,
|
| 9781 |
+
"step": 1388
|
| 9782 |
+
},
|
| 9783 |
+
{
|
| 9784 |
+
"epoch": 0.4444088945768677,
|
| 9785 |
+
"grad_norm": 8.057585716247559,
|
| 9786 |
+
"learning_rate": 0.00023846546557137782,
|
| 9787 |
+
"loss": 6.1908,
|
| 9788 |
+
"step": 1389
|
| 9789 |
+
},
|
| 9790 |
+
{
|
| 9791 |
+
"epoch": 0.4447288433850584,
|
| 9792 |
+
"grad_norm": 8.275229454040527,
|
| 9793 |
+
"learning_rate": 0.00023827195355564958,
|
| 9794 |
+
"loss": 6.4919,
|
| 9795 |
+
"step": 1390
|
| 9796 |
+
},
|
| 9797 |
+
{
|
| 9798 |
+
"epoch": 0.44504879219324905,
|
| 9799 |
+
"grad_norm": 14.329585075378418,
|
| 9800 |
+
"learning_rate": 0.00023807841154443912,
|
| 9801 |
+
"loss": 6.3344,
|
| 9802 |
+
"step": 1391
|
| 9803 |
+
},
|
| 9804 |
+
{
|
| 9805 |
+
"epoch": 0.44536874100143975,
|
| 9806 |
+
"grad_norm": 17.71234893798828,
|
| 9807 |
+
"learning_rate": 0.00023788483974308738,
|
| 9808 |
+
"loss": 6.1686,
|
| 9809 |
+
"step": 1392
|
| 9810 |
+
},
|
| 9811 |
+
{
|
| 9812 |
+
"epoch": 0.44568868980963045,
|
| 9813 |
+
"grad_norm": 17.175947189331055,
|
| 9814 |
+
"learning_rate": 0.00023769123835696676,
|
| 9815 |
+
"loss": 6.2478,
|
| 9816 |
+
"step": 1393
|
| 9817 |
+
},
|
| 9818 |
+
{
|
| 9819 |
+
"epoch": 0.44600863861782114,
|
| 9820 |
+
"grad_norm": 12.383498191833496,
|
| 9821 |
+
"learning_rate": 0.00023749760759148104,
|
| 9822 |
+
"loss": 6.2889,
|
| 9823 |
+
"step": 1394
|
| 9824 |
+
},
|
| 9825 |
+
{
|
| 9826 |
+
"epoch": 0.44632858742601184,
|
| 9827 |
+
"grad_norm": 26.6887149810791,
|
| 9828 |
+
"learning_rate": 0.0002373039476520651,
|
| 9829 |
+
"loss": 6.2837,
|
| 9830 |
+
"step": 1395
|
| 9831 |
+
},
|
| 9832 |
+
{
|
| 9833 |
+
"epoch": 0.44664853623420253,
|
| 9834 |
+
"grad_norm": 35.60121536254883,
|
| 9835 |
+
"learning_rate": 0.00023711025874418508,
|
| 9836 |
+
"loss": 6.2465,
|
| 9837 |
+
"step": 1396
|
| 9838 |
+
},
|
| 9839 |
+
{
|
| 9840 |
+
"epoch": 0.4469684850423932,
|
| 9841 |
+
"grad_norm": 26.76144790649414,
|
| 9842 |
+
"learning_rate": 0.00023691654107333755,
|
| 9843 |
+
"loss": 6.4341,
|
| 9844 |
+
"step": 1397
|
| 9845 |
+
},
|
| 9846 |
+
{
|
| 9847 |
+
"epoch": 0.4472884338505839,
|
| 9848 |
+
"grad_norm": 37.42619323730469,
|
| 9849 |
+
"learning_rate": 0.0002367227948450496,
|
| 9850 |
+
"loss": 6.4073,
|
| 9851 |
+
"step": 1398
|
| 9852 |
+
},
|
| 9853 |
+
{
|
| 9854 |
+
"epoch": 0.4476083826587746,
|
| 9855 |
+
"grad_norm": 24.048145294189453,
|
| 9856 |
+
"learning_rate": 0.00023652902026487883,
|
| 9857 |
+
"loss": 6.1317,
|
| 9858 |
+
"step": 1399
|
| 9859 |
+
},
|
| 9860 |
+
{
|
| 9861 |
+
"epoch": 0.4479283314669653,
|
| 9862 |
+
"grad_norm": 28.50050163269043,
|
| 9863 |
+
"learning_rate": 0.0002363352175384128,
|
| 9864 |
+
"loss": 6.2892,
|
| 9865 |
+
"step": 1400
|
| 9866 |
+
},
|
| 9867 |
+
{
|
| 9868 |
+
"epoch": 0.4479283314669653,
|
| 9869 |
+
"eval_loss": 3.1516573429107666,
|
| 9870 |
+
"eval_runtime": 233.5122,
|
| 9871 |
+
"eval_samples_per_second": 5.636,
|
| 9872 |
+
"eval_steps_per_second": 1.409,
|
| 9873 |
+
"step": 1400
|
| 9874 |
}
|
| 9875 |
],
|
| 9876 |
"logging_steps": 1,
|
|
|
|
| 9885 |
"early_stopping_threshold": 0.0
|
| 9886 |
},
|
| 9887 |
"attributes": {
|
| 9888 |
+
"early_stopping_patience_counter": 4
|
| 9889 |
}
|
| 9890 |
},
|
| 9891 |
"TrainerControl": {
|
|
|
|
| 9899 |
"attributes": {}
|
| 9900 |
}
|
| 9901 |
},
|
| 9902 |
+
"total_flos": 1.2911179536091054e+18,
|
| 9903 |
"train_batch_size": 4,
|
| 9904 |
"trial_name": null,
|
| 9905 |
"trial_params": null
|