Training in progress, step 3400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 319876032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:611da9169e2fcbc35717df9b7b4777d9f26d62cbab3cc2de779aed5eb11c413a
|
| 3 |
size 319876032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 86009814
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e6611736d5c9821f935aabd0606bb4a67ac4897e810f5642cc5931295d788cb
|
| 3 |
size 86009814
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e97d4d74e6de74991c9c8ced78eb80786b2e956c4ac33a1e9e263079ede360a1
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2080
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c96c404967946ed140fefd94954053f2f4f30c70aecab6ae5a80ef7b7bcbc867
|
| 3 |
size 2080
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.3507799804210663,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2800",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -22543,6 +22543,1414 @@
|
|
| 22543 |
"eval_samples_per_second": 5.334,
|
| 22544 |
"eval_steps_per_second": 2.667,
|
| 22545 |
"step": 3200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22546 |
}
|
| 22547 |
],
|
| 22548 |
"logging_steps": 1,
|
|
@@ -22557,7 +23965,7 @@
|
|
| 22557 |
"early_stopping_threshold": 0.0
|
| 22558 |
},
|
| 22559 |
"attributes": {
|
| 22560 |
-
"early_stopping_patience_counter":
|
| 22561 |
}
|
| 22562 |
},
|
| 22563 |
"TrainerControl": {
|
|
@@ -22566,12 +23974,12 @@
|
|
| 22566 |
"should_evaluate": false,
|
| 22567 |
"should_log": false,
|
| 22568 |
"should_save": true,
|
| 22569 |
-
"should_training_stop":
|
| 22570 |
},
|
| 22571 |
"attributes": {}
|
| 22572 |
}
|
| 22573 |
},
|
| 22574 |
-
"total_flos": 5.
|
| 22575 |
"train_batch_size": 2,
|
| 22576 |
"trial_name": null,
|
| 22577 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.3507799804210663,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2800",
|
| 4 |
+
"epoch": 0.26489034318881227,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 3400,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 22543 |
"eval_samples_per_second": 5.334,
|
| 22544 |
"eval_steps_per_second": 2.667,
|
| 22545 |
"step": 3200
|
| 22546 |
+
},
|
| 22547 |
+
{
|
| 22548 |
+
"epoch": 0.24938646721982002,
|
| 22549 |
+
"grad_norm": 0.5106058716773987,
|
| 22550 |
+
"learning_rate": 0.0001966666750377044,
|
| 22551 |
+
"loss": 0.3128,
|
| 22552 |
+
"step": 3201
|
| 22553 |
+
},
|
| 22554 |
+
{
|
| 22555 |
+
"epoch": 0.24946437614428732,
|
| 22556 |
+
"grad_norm": 0.5966877937316895,
|
| 22557 |
+
"learning_rate": 0.00019666457956191152,
|
| 22558 |
+
"loss": 0.3205,
|
| 22559 |
+
"step": 3202
|
| 22560 |
+
},
|
| 22561 |
+
{
|
| 22562 |
+
"epoch": 0.24954228506875462,
|
| 22563 |
+
"grad_norm": 0.6634540557861328,
|
| 22564 |
+
"learning_rate": 0.00019666249863803387,
|
| 22565 |
+
"loss": 0.3938,
|
| 22566 |
+
"step": 3203
|
| 22567 |
+
},
|
| 22568 |
+
{
|
| 22569 |
+
"epoch": 0.2496201939932219,
|
| 22570 |
+
"grad_norm": 0.7797407507896423,
|
| 22571 |
+
"learning_rate": 0.00019666038861032575,
|
| 22572 |
+
"loss": 0.3575,
|
| 22573 |
+
"step": 3204
|
| 22574 |
+
},
|
| 22575 |
+
{
|
| 22576 |
+
"epoch": 0.2496981029176892,
|
| 22577 |
+
"grad_norm": 1.148941993713379,
|
| 22578 |
+
"learning_rate": 0.0001966583076864481,
|
| 22579 |
+
"loss": 0.3863,
|
| 22580 |
+
"step": 3205
|
| 22581 |
+
},
|
| 22582 |
+
{
|
| 22583 |
+
"epoch": 0.24977601184215653,
|
| 22584 |
+
"grad_norm": 0.5081613063812256,
|
| 22585 |
+
"learning_rate": 0.0001966562122106552,
|
| 22586 |
+
"loss": 0.3303,
|
| 22587 |
+
"step": 3206
|
| 22588 |
+
},
|
| 22589 |
+
{
|
| 22590 |
+
"epoch": 0.24985392076662383,
|
| 22591 |
+
"grad_norm": 0.5668550133705139,
|
| 22592 |
+
"learning_rate": 0.00019665411673486233,
|
| 22593 |
+
"loss": 0.3195,
|
| 22594 |
+
"step": 3207
|
| 22595 |
+
},
|
| 22596 |
+
{
|
| 22597 |
+
"epoch": 0.24993182969109112,
|
| 22598 |
+
"grad_norm": 0.49681487679481506,
|
| 22599 |
+
"learning_rate": 0.00019665202125906944,
|
| 22600 |
+
"loss": 0.3002,
|
| 22601 |
+
"step": 3208
|
| 22602 |
+
},
|
| 22603 |
+
{
|
| 22604 |
+
"epoch": 0.2500097386155584,
|
| 22605 |
+
"grad_norm": 0.7589192986488342,
|
| 22606 |
+
"learning_rate": 0.00019664992578327656,
|
| 22607 |
+
"loss": 0.4274,
|
| 22608 |
+
"step": 3209
|
| 22609 |
+
},
|
| 22610 |
+
{
|
| 22611 |
+
"epoch": 0.2500876475400257,
|
| 22612 |
+
"grad_norm": 0.610198974609375,
|
| 22613 |
+
"learning_rate": 0.00019664783030748367,
|
| 22614 |
+
"loss": 0.2975,
|
| 22615 |
+
"step": 3210
|
| 22616 |
+
},
|
| 22617 |
+
{
|
| 22618 |
+
"epoch": 0.250165556464493,
|
| 22619 |
+
"grad_norm": 0.6626783013343811,
|
| 22620 |
+
"learning_rate": 0.0001966457348316908,
|
| 22621 |
+
"loss": 0.4035,
|
| 22622 |
+
"step": 3211
|
| 22623 |
+
},
|
| 22624 |
+
{
|
| 22625 |
+
"epoch": 0.2502434653889603,
|
| 22626 |
+
"grad_norm": 0.5402490496635437,
|
| 22627 |
+
"learning_rate": 0.0001966436393558979,
|
| 22628 |
+
"loss": 0.4124,
|
| 22629 |
+
"step": 3212
|
| 22630 |
+
},
|
| 22631 |
+
{
|
| 22632 |
+
"epoch": 0.2503213743134276,
|
| 22633 |
+
"grad_norm": 0.5486300587654114,
|
| 22634 |
+
"learning_rate": 0.0001966415293281898,
|
| 22635 |
+
"loss": 0.3389,
|
| 22636 |
+
"step": 3213
|
| 22637 |
+
},
|
| 22638 |
+
{
|
| 22639 |
+
"epoch": 0.2503992832378949,
|
| 22640 |
+
"grad_norm": 0.5691275000572205,
|
| 22641 |
+
"learning_rate": 0.0001966394338523969,
|
| 22642 |
+
"loss": 0.4232,
|
| 22643 |
+
"step": 3214
|
| 22644 |
+
},
|
| 22645 |
+
{
|
| 22646 |
+
"epoch": 0.2504771921623622,
|
| 22647 |
+
"grad_norm": 0.5427938103675842,
|
| 22648 |
+
"learning_rate": 0.00019663733837660402,
|
| 22649 |
+
"loss": 0.3267,
|
| 22650 |
+
"step": 3215
|
| 22651 |
+
},
|
| 22652 |
+
{
|
| 22653 |
+
"epoch": 0.2505551010868295,
|
| 22654 |
+
"grad_norm": 0.7739267945289612,
|
| 22655 |
+
"learning_rate": 0.00019663524290081114,
|
| 22656 |
+
"loss": 0.2278,
|
| 22657 |
+
"step": 3216
|
| 22658 |
+
},
|
| 22659 |
+
{
|
| 22660 |
+
"epoch": 0.2506330100112968,
|
| 22661 |
+
"grad_norm": 0.5968644618988037,
|
| 22662 |
+
"learning_rate": 0.00019663314742501825,
|
| 22663 |
+
"loss": 0.3479,
|
| 22664 |
+
"step": 3217
|
| 22665 |
+
},
|
| 22666 |
+
{
|
| 22667 |
+
"epoch": 0.2507109189357641,
|
| 22668 |
+
"grad_norm": 0.6475822925567627,
|
| 22669 |
+
"learning_rate": 0.00019663103739731014,
|
| 22670 |
+
"loss": 0.3416,
|
| 22671 |
+
"step": 3218
|
| 22672 |
+
},
|
| 22673 |
+
{
|
| 22674 |
+
"epoch": 0.2507888278602314,
|
| 22675 |
+
"grad_norm": 0.7948753237724304,
|
| 22676 |
+
"learning_rate": 0.00019662894192151725,
|
| 22677 |
+
"loss": 0.3808,
|
| 22678 |
+
"step": 3219
|
| 22679 |
+
},
|
| 22680 |
+
{
|
| 22681 |
+
"epoch": 0.2508667367846987,
|
| 22682 |
+
"grad_norm": 0.6238223314285278,
|
| 22683 |
+
"learning_rate": 0.00019662683189380914,
|
| 22684 |
+
"loss": 0.3667,
|
| 22685 |
+
"step": 3220
|
| 22686 |
+
},
|
| 22687 |
+
{
|
| 22688 |
+
"epoch": 0.25094464570916597,
|
| 22689 |
+
"grad_norm": 0.5894230604171753,
|
| 22690 |
+
"learning_rate": 0.00019662472186610103,
|
| 22691 |
+
"loss": 0.3653,
|
| 22692 |
+
"step": 3221
|
| 22693 |
+
},
|
| 22694 |
+
{
|
| 22695 |
+
"epoch": 0.25102255463363327,
|
| 22696 |
+
"grad_norm": 0.6964344382286072,
|
| 22697 |
+
"learning_rate": 0.00019662262639030814,
|
| 22698 |
+
"loss": 0.5578,
|
| 22699 |
+
"step": 3222
|
| 22700 |
+
},
|
| 22701 |
+
{
|
| 22702 |
+
"epoch": 0.25110046355810056,
|
| 22703 |
+
"grad_norm": 0.5642386674880981,
|
| 22704 |
+
"learning_rate": 0.00019662051636260003,
|
| 22705 |
+
"loss": 0.3593,
|
| 22706 |
+
"step": 3223
|
| 22707 |
+
},
|
| 22708 |
+
{
|
| 22709 |
+
"epoch": 0.25117837248256786,
|
| 22710 |
+
"grad_norm": 0.45551422238349915,
|
| 22711 |
+
"learning_rate": 0.00019661842088680714,
|
| 22712 |
+
"loss": 0.3728,
|
| 22713 |
+
"step": 3224
|
| 22714 |
+
},
|
| 22715 |
+
{
|
| 22716 |
+
"epoch": 0.25125628140703515,
|
| 22717 |
+
"grad_norm": 0.5758004784584045,
|
| 22718 |
+
"learning_rate": 0.00019661631085909903,
|
| 22719 |
+
"loss": 0.4128,
|
| 22720 |
+
"step": 3225
|
| 22721 |
+
},
|
| 22722 |
+
{
|
| 22723 |
+
"epoch": 0.25133419033150245,
|
| 22724 |
+
"grad_norm": 0.805318295955658,
|
| 22725 |
+
"learning_rate": 0.00019661420083139092,
|
| 22726 |
+
"loss": 0.3051,
|
| 22727 |
+
"step": 3226
|
| 22728 |
+
},
|
| 22729 |
+
{
|
| 22730 |
+
"epoch": 0.25141209925596975,
|
| 22731 |
+
"grad_norm": 0.6580514907836914,
|
| 22732 |
+
"learning_rate": 0.00019661210535559803,
|
| 22733 |
+
"loss": 0.3211,
|
| 22734 |
+
"step": 3227
|
| 22735 |
+
},
|
| 22736 |
+
{
|
| 22737 |
+
"epoch": 0.2514900081804371,
|
| 22738 |
+
"grad_norm": 0.6343413591384888,
|
| 22739 |
+
"learning_rate": 0.00019660999532788992,
|
| 22740 |
+
"loss": 0.4242,
|
| 22741 |
+
"step": 3228
|
| 22742 |
+
},
|
| 22743 |
+
{
|
| 22744 |
+
"epoch": 0.2515679171049044,
|
| 22745 |
+
"grad_norm": 0.5391520261764526,
|
| 22746 |
+
"learning_rate": 0.0001966078853001818,
|
| 22747 |
+
"loss": 0.4033,
|
| 22748 |
+
"step": 3229
|
| 22749 |
+
},
|
| 22750 |
+
{
|
| 22751 |
+
"epoch": 0.2516458260293717,
|
| 22752 |
+
"grad_norm": 0.40526899695396423,
|
| 22753 |
+
"learning_rate": 0.0001966057752724737,
|
| 22754 |
+
"loss": 0.2415,
|
| 22755 |
+
"step": 3230
|
| 22756 |
+
},
|
| 22757 |
+
{
|
| 22758 |
+
"epoch": 0.251723734953839,
|
| 22759 |
+
"grad_norm": 0.5737383961677551,
|
| 22760 |
+
"learning_rate": 0.00019660366524476558,
|
| 22761 |
+
"loss": 0.2746,
|
| 22762 |
+
"step": 3231
|
| 22763 |
+
},
|
| 22764 |
+
{
|
| 22765 |
+
"epoch": 0.2518016438783063,
|
| 22766 |
+
"grad_norm": 0.6161379218101501,
|
| 22767 |
+
"learning_rate": 0.00019660155521705747,
|
| 22768 |
+
"loss": 0.4734,
|
| 22769 |
+
"step": 3232
|
| 22770 |
+
},
|
| 22771 |
+
{
|
| 22772 |
+
"epoch": 0.2518795528027736,
|
| 22773 |
+
"grad_norm": 0.5806547403335571,
|
| 22774 |
+
"learning_rate": 0.00019659944518934935,
|
| 22775 |
+
"loss": 0.3506,
|
| 22776 |
+
"step": 3233
|
| 22777 |
+
},
|
| 22778 |
+
{
|
| 22779 |
+
"epoch": 0.2519574617272409,
|
| 22780 |
+
"grad_norm": 0.5250337719917297,
|
| 22781 |
+
"learning_rate": 0.00019659733516164124,
|
| 22782 |
+
"loss": 0.3125,
|
| 22783 |
+
"step": 3234
|
| 22784 |
+
},
|
| 22785 |
+
{
|
| 22786 |
+
"epoch": 0.25203537065170817,
|
| 22787 |
+
"grad_norm": 0.5968940258026123,
|
| 22788 |
+
"learning_rate": 0.00019659522513393313,
|
| 22789 |
+
"loss": 0.4744,
|
| 22790 |
+
"step": 3235
|
| 22791 |
+
},
|
| 22792 |
+
{
|
| 22793 |
+
"epoch": 0.25211327957617546,
|
| 22794 |
+
"grad_norm": 0.5862849950790405,
|
| 22795 |
+
"learning_rate": 0.00019659310055430979,
|
| 22796 |
+
"loss": 0.2762,
|
| 22797 |
+
"step": 3236
|
| 22798 |
+
},
|
| 22799 |
+
{
|
| 22800 |
+
"epoch": 0.25219118850064276,
|
| 22801 |
+
"grad_norm": 0.6882526278495789,
|
| 22802 |
+
"learning_rate": 0.00019659099052660167,
|
| 22803 |
+
"loss": 0.4965,
|
| 22804 |
+
"step": 3237
|
| 22805 |
+
},
|
| 22806 |
+
{
|
| 22807 |
+
"epoch": 0.25226909742511006,
|
| 22808 |
+
"grad_norm": 0.6012639999389648,
|
| 22809 |
+
"learning_rate": 0.00019658888049889356,
|
| 22810 |
+
"loss": 0.2848,
|
| 22811 |
+
"step": 3238
|
| 22812 |
+
},
|
| 22813 |
+
{
|
| 22814 |
+
"epoch": 0.25234700634957735,
|
| 22815 |
+
"grad_norm": 0.49253973364830017,
|
| 22816 |
+
"learning_rate": 0.00019658675591927022,
|
| 22817 |
+
"loss": 0.1979,
|
| 22818 |
+
"step": 3239
|
| 22819 |
+
},
|
| 22820 |
+
{
|
| 22821 |
+
"epoch": 0.25242491527404465,
|
| 22822 |
+
"grad_norm": 0.7452335953712463,
|
| 22823 |
+
"learning_rate": 0.0001965846458915621,
|
| 22824 |
+
"loss": 0.426,
|
| 22825 |
+
"step": 3240
|
| 22826 |
+
},
|
| 22827 |
+
{
|
| 22828 |
+
"epoch": 0.25250282419851194,
|
| 22829 |
+
"grad_norm": 0.6414846181869507,
|
| 22830 |
+
"learning_rate": 0.00019658252131193876,
|
| 22831 |
+
"loss": 0.4245,
|
| 22832 |
+
"step": 3241
|
| 22833 |
+
},
|
| 22834 |
+
{
|
| 22835 |
+
"epoch": 0.25258073312297924,
|
| 22836 |
+
"grad_norm": 0.5749356746673584,
|
| 22837 |
+
"learning_rate": 0.00019658041128423065,
|
| 22838 |
+
"loss": 0.2559,
|
| 22839 |
+
"step": 3242
|
| 22840 |
+
},
|
| 22841 |
+
{
|
| 22842 |
+
"epoch": 0.25265864204744654,
|
| 22843 |
+
"grad_norm": 0.5636880993843079,
|
| 22844 |
+
"learning_rate": 0.00019657830125652254,
|
| 22845 |
+
"loss": 0.3648,
|
| 22846 |
+
"step": 3243
|
| 22847 |
+
},
|
| 22848 |
+
{
|
| 22849 |
+
"epoch": 0.25273655097191383,
|
| 22850 |
+
"grad_norm": 0.7396482229232788,
|
| 22851 |
+
"learning_rate": 0.00019657619122881442,
|
| 22852 |
+
"loss": 0.4529,
|
| 22853 |
+
"step": 3244
|
| 22854 |
+
},
|
| 22855 |
+
{
|
| 22856 |
+
"epoch": 0.2528144598963811,
|
| 22857 |
+
"grad_norm": 0.5249337553977966,
|
| 22858 |
+
"learning_rate": 0.00019657406664919108,
|
| 22859 |
+
"loss": 0.2417,
|
| 22860 |
+
"step": 3245
|
| 22861 |
+
},
|
| 22862 |
+
{
|
| 22863 |
+
"epoch": 0.2528923688208484,
|
| 22864 |
+
"grad_norm": 0.5865638256072998,
|
| 22865 |
+
"learning_rate": 0.00019657194206956774,
|
| 22866 |
+
"loss": 0.3282,
|
| 22867 |
+
"step": 3246
|
| 22868 |
+
},
|
| 22869 |
+
{
|
| 22870 |
+
"epoch": 0.2529702777453157,
|
| 22871 |
+
"grad_norm": 0.66961669921875,
|
| 22872 |
+
"learning_rate": 0.0001965698174899444,
|
| 22873 |
+
"loss": 0.3629,
|
| 22874 |
+
"step": 3247
|
| 22875 |
+
},
|
| 22876 |
+
{
|
| 22877 |
+
"epoch": 0.253048186669783,
|
| 22878 |
+
"grad_norm": 0.7234030365943909,
|
| 22879 |
+
"learning_rate": 0.00019656769291032106,
|
| 22880 |
+
"loss": 0.3566,
|
| 22881 |
+
"step": 3248
|
| 22882 |
+
},
|
| 22883 |
+
{
|
| 22884 |
+
"epoch": 0.2531260955942503,
|
| 22885 |
+
"grad_norm": 0.7490926384925842,
|
| 22886 |
+
"learning_rate": 0.00019656556833069772,
|
| 22887 |
+
"loss": 0.4413,
|
| 22888 |
+
"step": 3249
|
| 22889 |
+
},
|
| 22890 |
+
{
|
| 22891 |
+
"epoch": 0.2532040045187176,
|
| 22892 |
+
"grad_norm": 0.6140217781066895,
|
| 22893 |
+
"learning_rate": 0.0001965634583029896,
|
| 22894 |
+
"loss": 0.2728,
|
| 22895 |
+
"step": 3250
|
| 22896 |
+
},
|
| 22897 |
+
{
|
| 22898 |
+
"epoch": 0.2532819134431849,
|
| 22899 |
+
"grad_norm": 0.5819464325904846,
|
| 22900 |
+
"learning_rate": 0.00019656133372336626,
|
| 22901 |
+
"loss": 0.3647,
|
| 22902 |
+
"step": 3251
|
| 22903 |
+
},
|
| 22904 |
+
{
|
| 22905 |
+
"epoch": 0.2533598223676522,
|
| 22906 |
+
"grad_norm": 0.5771917104721069,
|
| 22907 |
+
"learning_rate": 0.00019655920914374292,
|
| 22908 |
+
"loss": 0.3559,
|
| 22909 |
+
"step": 3252
|
| 22910 |
+
},
|
| 22911 |
+
{
|
| 22912 |
+
"epoch": 0.2534377312921195,
|
| 22913 |
+
"grad_norm": 0.5492159724235535,
|
| 22914 |
+
"learning_rate": 0.00019655708456411958,
|
| 22915 |
+
"loss": 0.3492,
|
| 22916 |
+
"step": 3253
|
| 22917 |
+
},
|
| 22918 |
+
{
|
| 22919 |
+
"epoch": 0.2535156402165868,
|
| 22920 |
+
"grad_norm": 0.5098065137863159,
|
| 22921 |
+
"learning_rate": 0.00019655495998449624,
|
| 22922 |
+
"loss": 0.3966,
|
| 22923 |
+
"step": 3254
|
| 22924 |
+
},
|
| 22925 |
+
{
|
| 22926 |
+
"epoch": 0.2535935491410541,
|
| 22927 |
+
"grad_norm": 0.5923775434494019,
|
| 22928 |
+
"learning_rate": 0.00019655284995678812,
|
| 22929 |
+
"loss": 0.3589,
|
| 22930 |
+
"step": 3255
|
| 22931 |
+
},
|
| 22932 |
+
{
|
| 22933 |
+
"epoch": 0.2536714580655214,
|
| 22934 |
+
"grad_norm": 0.46894848346710205,
|
| 22935 |
+
"learning_rate": 0.00019655071082524955,
|
| 22936 |
+
"loss": 0.2666,
|
| 22937 |
+
"step": 3256
|
| 22938 |
+
},
|
| 22939 |
+
{
|
| 22940 |
+
"epoch": 0.2537493669899887,
|
| 22941 |
+
"grad_norm": 0.7138003706932068,
|
| 22942 |
+
"learning_rate": 0.0001965485862456262,
|
| 22943 |
+
"loss": 0.477,
|
| 22944 |
+
"step": 3257
|
| 22945 |
+
},
|
| 22946 |
+
{
|
| 22947 |
+
"epoch": 0.253827275914456,
|
| 22948 |
+
"grad_norm": 0.5192536115646362,
|
| 22949 |
+
"learning_rate": 0.00019654646166600287,
|
| 22950 |
+
"loss": 0.2198,
|
| 22951 |
+
"step": 3258
|
| 22952 |
+
},
|
| 22953 |
+
{
|
| 22954 |
+
"epoch": 0.2539051848389233,
|
| 22955 |
+
"grad_norm": 0.5263598561286926,
|
| 22956 |
+
"learning_rate": 0.00019654433708637953,
|
| 22957 |
+
"loss": 0.3282,
|
| 22958 |
+
"step": 3259
|
| 22959 |
+
},
|
| 22960 |
+
{
|
| 22961 |
+
"epoch": 0.2539830937633906,
|
| 22962 |
+
"grad_norm": 0.8291860222816467,
|
| 22963 |
+
"learning_rate": 0.0001965422125067562,
|
| 22964 |
+
"loss": 0.5265,
|
| 22965 |
+
"step": 3260
|
| 22966 |
+
},
|
| 22967 |
+
{
|
| 22968 |
+
"epoch": 0.2540610026878579,
|
| 22969 |
+
"grad_norm": 0.6136093735694885,
|
| 22970 |
+
"learning_rate": 0.00019654007337521762,
|
| 22971 |
+
"loss": 0.3353,
|
| 22972 |
+
"step": 3261
|
| 22973 |
+
},
|
| 22974 |
+
{
|
| 22975 |
+
"epoch": 0.2541389116123252,
|
| 22976 |
+
"grad_norm": 0.586192786693573,
|
| 22977 |
+
"learning_rate": 0.00019653794879559427,
|
| 22978 |
+
"loss": 0.4237,
|
| 22979 |
+
"step": 3262
|
| 22980 |
+
},
|
| 22981 |
+
{
|
| 22982 |
+
"epoch": 0.2542168205367925,
|
| 22983 |
+
"grad_norm": 0.4203140139579773,
|
| 22984 |
+
"learning_rate": 0.00019653582421597093,
|
| 22985 |
+
"loss": 0.2814,
|
| 22986 |
+
"step": 3263
|
| 22987 |
+
},
|
| 22988 |
+
{
|
| 22989 |
+
"epoch": 0.2542947294612598,
|
| 22990 |
+
"grad_norm": 0.5302475690841675,
|
| 22991 |
+
"learning_rate": 0.00019653368508443236,
|
| 22992 |
+
"loss": 0.304,
|
| 22993 |
+
"step": 3264
|
| 22994 |
+
},
|
| 22995 |
+
{
|
| 22996 |
+
"epoch": 0.2543726383857271,
|
| 22997 |
+
"grad_norm": 0.5462174415588379,
|
| 22998 |
+
"learning_rate": 0.0001965315459528938,
|
| 22999 |
+
"loss": 0.3875,
|
| 23000 |
+
"step": 3265
|
| 23001 |
+
},
|
| 23002 |
+
{
|
| 23003 |
+
"epoch": 0.2544505473101944,
|
| 23004 |
+
"grad_norm": 0.8253766894340515,
|
| 23005 |
+
"learning_rate": 0.00019652942137327045,
|
| 23006 |
+
"loss": 0.4326,
|
| 23007 |
+
"step": 3266
|
| 23008 |
+
},
|
| 23009 |
+
{
|
| 23010 |
+
"epoch": 0.2545284562346617,
|
| 23011 |
+
"grad_norm": 0.5205588936805725,
|
| 23012 |
+
"learning_rate": 0.00019652728224173188,
|
| 23013 |
+
"loss": 0.2916,
|
| 23014 |
+
"step": 3267
|
| 23015 |
+
},
|
| 23016 |
+
{
|
| 23017 |
+
"epoch": 0.254606365159129,
|
| 23018 |
+
"grad_norm": 0.6479153633117676,
|
| 23019 |
+
"learning_rate": 0.00019652515766210854,
|
| 23020 |
+
"loss": 0.3559,
|
| 23021 |
+
"step": 3268
|
| 23022 |
+
},
|
| 23023 |
+
{
|
| 23024 |
+
"epoch": 0.2546842740835963,
|
| 23025 |
+
"grad_norm": 0.49056729674339294,
|
| 23026 |
+
"learning_rate": 0.00019652301853056997,
|
| 23027 |
+
"loss": 0.2645,
|
| 23028 |
+
"step": 3269
|
| 23029 |
+
},
|
| 23030 |
+
{
|
| 23031 |
+
"epoch": 0.2547621830080636,
|
| 23032 |
+
"grad_norm": 0.5412452220916748,
|
| 23033 |
+
"learning_rate": 0.0001965208793990314,
|
| 23034 |
+
"loss": 0.415,
|
| 23035 |
+
"step": 3270
|
| 23036 |
+
},
|
| 23037 |
+
{
|
| 23038 |
+
"epoch": 0.2548400919325309,
|
| 23039 |
+
"grad_norm": 0.8292869329452515,
|
| 23040 |
+
"learning_rate": 0.00019651875481940806,
|
| 23041 |
+
"loss": 0.3987,
|
| 23042 |
+
"step": 3271
|
| 23043 |
+
},
|
| 23044 |
+
{
|
| 23045 |
+
"epoch": 0.2549180008569982,
|
| 23046 |
+
"grad_norm": 0.7451716065406799,
|
| 23047 |
+
"learning_rate": 0.0001965166156878695,
|
| 23048 |
+
"loss": 0.5601,
|
| 23049 |
+
"step": 3272
|
| 23050 |
+
},
|
| 23051 |
+
{
|
| 23052 |
+
"epoch": 0.25499590978146547,
|
| 23053 |
+
"grad_norm": 0.5898613333702087,
|
| 23054 |
+
"learning_rate": 0.00019651447655633092,
|
| 23055 |
+
"loss": 0.5784,
|
| 23056 |
+
"step": 3273
|
| 23057 |
+
},
|
| 23058 |
+
{
|
| 23059 |
+
"epoch": 0.25507381870593276,
|
| 23060 |
+
"grad_norm": 0.5934725999832153,
|
| 23061 |
+
"learning_rate": 0.00019651233742479235,
|
| 23062 |
+
"loss": 0.3467,
|
| 23063 |
+
"step": 3274
|
| 23064 |
+
},
|
| 23065 |
+
{
|
| 23066 |
+
"epoch": 0.25515172763040006,
|
| 23067 |
+
"grad_norm": 0.5787108540534973,
|
| 23068 |
+
"learning_rate": 0.00019651019829325378,
|
| 23069 |
+
"loss": 0.3488,
|
| 23070 |
+
"step": 3275
|
| 23071 |
+
},
|
| 23072 |
+
{
|
| 23073 |
+
"epoch": 0.25522963655486736,
|
| 23074 |
+
"grad_norm": 0.6317718625068665,
|
| 23075 |
+
"learning_rate": 0.00019650807371363044,
|
| 23076 |
+
"loss": 0.4192,
|
| 23077 |
+
"step": 3276
|
| 23078 |
+
},
|
| 23079 |
+
{
|
| 23080 |
+
"epoch": 0.25530754547933465,
|
| 23081 |
+
"grad_norm": 0.7298019528388977,
|
| 23082 |
+
"learning_rate": 0.00019650592003017664,
|
| 23083 |
+
"loss": 0.4621,
|
| 23084 |
+
"step": 3277
|
| 23085 |
+
},
|
| 23086 |
+
{
|
| 23087 |
+
"epoch": 0.25538545440380195,
|
| 23088 |
+
"grad_norm": 0.666454553604126,
|
| 23089 |
+
"learning_rate": 0.0001965037954505533,
|
| 23090 |
+
"loss": 0.3894,
|
| 23091 |
+
"step": 3278
|
| 23092 |
+
},
|
| 23093 |
+
{
|
| 23094 |
+
"epoch": 0.25546336332826924,
|
| 23095 |
+
"grad_norm": 0.5220448970794678,
|
| 23096 |
+
"learning_rate": 0.0001965016417670995,
|
| 23097 |
+
"loss": 0.3193,
|
| 23098 |
+
"step": 3279
|
| 23099 |
+
},
|
| 23100 |
+
{
|
| 23101 |
+
"epoch": 0.25554127225273654,
|
| 23102 |
+
"grad_norm": 0.6479876041412354,
|
| 23103 |
+
"learning_rate": 0.00019649950263556093,
|
| 23104 |
+
"loss": 0.4044,
|
| 23105 |
+
"step": 3280
|
| 23106 |
+
},
|
| 23107 |
+
{
|
| 23108 |
+
"epoch": 0.25561918117720384,
|
| 23109 |
+
"grad_norm": 0.48965969681739807,
|
| 23110 |
+
"learning_rate": 0.00019649736350402236,
|
| 23111 |
+
"loss": 0.2473,
|
| 23112 |
+
"step": 3281
|
| 23113 |
+
},
|
| 23114 |
+
{
|
| 23115 |
+
"epoch": 0.25569709010167113,
|
| 23116 |
+
"grad_norm": 0.7388249635696411,
|
| 23117 |
+
"learning_rate": 0.0001964952243724838,
|
| 23118 |
+
"loss": 0.3602,
|
| 23119 |
+
"step": 3282
|
| 23120 |
+
},
|
| 23121 |
+
{
|
| 23122 |
+
"epoch": 0.2557749990261384,
|
| 23123 |
+
"grad_norm": 0.7653477191925049,
|
| 23124 |
+
"learning_rate": 0.00019649307068903,
|
| 23125 |
+
"loss": 0.4901,
|
| 23126 |
+
"step": 3283
|
| 23127 |
+
},
|
| 23128 |
+
{
|
| 23129 |
+
"epoch": 0.2558529079506057,
|
| 23130 |
+
"grad_norm": 0.5637389421463013,
|
| 23131 |
+
"learning_rate": 0.00019649093155749142,
|
| 23132 |
+
"loss": 0.2566,
|
| 23133 |
+
"step": 3284
|
| 23134 |
+
},
|
| 23135 |
+
{
|
| 23136 |
+
"epoch": 0.255930816875073,
|
| 23137 |
+
"grad_norm": 0.6961542963981628,
|
| 23138 |
+
"learning_rate": 0.00019648879242595285,
|
| 23139 |
+
"loss": 0.54,
|
| 23140 |
+
"step": 3285
|
| 23141 |
+
},
|
| 23142 |
+
{
|
| 23143 |
+
"epoch": 0.2560087257995403,
|
| 23144 |
+
"grad_norm": 0.7896203994750977,
|
| 23145 |
+
"learning_rate": 0.00019648665329441428,
|
| 23146 |
+
"loss": 0.3297,
|
| 23147 |
+
"step": 3286
|
| 23148 |
+
},
|
| 23149 |
+
{
|
| 23150 |
+
"epoch": 0.2560866347240076,
|
| 23151 |
+
"grad_norm": 0.6780399680137634,
|
| 23152 |
+
"learning_rate": 0.00019648449961096048,
|
| 23153 |
+
"loss": 0.2961,
|
| 23154 |
+
"step": 3287
|
| 23155 |
+
},
|
| 23156 |
+
{
|
| 23157 |
+
"epoch": 0.2561645436484749,
|
| 23158 |
+
"grad_norm": 0.5588671565055847,
|
| 23159 |
+
"learning_rate": 0.00019648236047942191,
|
| 23160 |
+
"loss": 0.3713,
|
| 23161 |
+
"step": 3288
|
| 23162 |
+
},
|
| 23163 |
+
{
|
| 23164 |
+
"epoch": 0.2562424525729422,
|
| 23165 |
+
"grad_norm": 0.5449357032775879,
|
| 23166 |
+
"learning_rate": 0.00019648020679596812,
|
| 23167 |
+
"loss": 0.3048,
|
| 23168 |
+
"step": 3289
|
| 23169 |
+
},
|
| 23170 |
+
{
|
| 23171 |
+
"epoch": 0.25632036149740955,
|
| 23172 |
+
"grad_norm": 0.5603000521659851,
|
| 23173 |
+
"learning_rate": 0.00019647805311251432,
|
| 23174 |
+
"loss": 0.2669,
|
| 23175 |
+
"step": 3290
|
| 23176 |
+
},
|
| 23177 |
+
{
|
| 23178 |
+
"epoch": 0.25639827042187685,
|
| 23179 |
+
"grad_norm": 0.6949597001075745,
|
| 23180 |
+
"learning_rate": 0.00019647589942906052,
|
| 23181 |
+
"loss": 0.336,
|
| 23182 |
+
"step": 3291
|
| 23183 |
+
},
|
| 23184 |
+
{
|
| 23185 |
+
"epoch": 0.25647617934634415,
|
| 23186 |
+
"grad_norm": 0.5442857146263123,
|
| 23187 |
+
"learning_rate": 0.00019647376029752195,
|
| 23188 |
+
"loss": 0.2353,
|
| 23189 |
+
"step": 3292
|
| 23190 |
+
},
|
| 23191 |
+
{
|
| 23192 |
+
"epoch": 0.25655408827081144,
|
| 23193 |
+
"grad_norm": 0.6836625337600708,
|
| 23194 |
+
"learning_rate": 0.00019647162116598338,
|
| 23195 |
+
"loss": 0.5784,
|
| 23196 |
+
"step": 3293
|
| 23197 |
+
},
|
| 23198 |
+
{
|
| 23199 |
+
"epoch": 0.25663199719527874,
|
| 23200 |
+
"grad_norm": 0.7158687114715576,
|
| 23201 |
+
"learning_rate": 0.00019646945293061435,
|
| 23202 |
+
"loss": 0.3767,
|
| 23203 |
+
"step": 3294
|
| 23204 |
+
},
|
| 23205 |
+
{
|
| 23206 |
+
"epoch": 0.25670990611974603,
|
| 23207 |
+
"grad_norm": 0.5451361536979675,
|
| 23208 |
+
"learning_rate": 0.00019646731379907578,
|
| 23209 |
+
"loss": 0.2799,
|
| 23210 |
+
"step": 3295
|
| 23211 |
+
},
|
| 23212 |
+
{
|
| 23213 |
+
"epoch": 0.25678781504421333,
|
| 23214 |
+
"grad_norm": 0.5864826440811157,
|
| 23215 |
+
"learning_rate": 0.00019646516011562198,
|
| 23216 |
+
"loss": 0.4114,
|
| 23217 |
+
"step": 3296
|
| 23218 |
+
},
|
| 23219 |
+
{
|
| 23220 |
+
"epoch": 0.2568657239686806,
|
| 23221 |
+
"grad_norm": 0.663476288318634,
|
| 23222 |
+
"learning_rate": 0.00019646300643216819,
|
| 23223 |
+
"loss": 0.2973,
|
| 23224 |
+
"step": 3297
|
| 23225 |
+
},
|
| 23226 |
+
{
|
| 23227 |
+
"epoch": 0.2569436328931479,
|
| 23228 |
+
"grad_norm": 0.7992104291915894,
|
| 23229 |
+
"learning_rate": 0.0001964608527487144,
|
| 23230 |
+
"loss": 0.4214,
|
| 23231 |
+
"step": 3298
|
| 23232 |
+
},
|
| 23233 |
+
{
|
| 23234 |
+
"epoch": 0.2570215418176152,
|
| 23235 |
+
"grad_norm": 0.5071900486946106,
|
| 23236 |
+
"learning_rate": 0.0001964586990652606,
|
| 23237 |
+
"loss": 0.3133,
|
| 23238 |
+
"step": 3299
|
| 23239 |
+
},
|
| 23240 |
+
{
|
| 23241 |
+
"epoch": 0.2570994507420825,
|
| 23242 |
+
"grad_norm": 0.6618044376373291,
|
| 23243 |
+
"learning_rate": 0.0001964565453818068,
|
| 23244 |
+
"loss": 0.2752,
|
| 23245 |
+
"step": 3300
|
| 23246 |
+
},
|
| 23247 |
+
{
|
| 23248 |
+
"epoch": 0.2571773596665498,
|
| 23249 |
+
"grad_norm": 0.769734799861908,
|
| 23250 |
+
"learning_rate": 0.000196454391698353,
|
| 23251 |
+
"loss": 0.4394,
|
| 23252 |
+
"step": 3301
|
| 23253 |
+
},
|
| 23254 |
+
{
|
| 23255 |
+
"epoch": 0.2572552685910171,
|
| 23256 |
+
"grad_norm": 0.6918005347251892,
|
| 23257 |
+
"learning_rate": 0.0001964522380148992,
|
| 23258 |
+
"loss": 0.319,
|
| 23259 |
+
"step": 3302
|
| 23260 |
+
},
|
| 23261 |
+
{
|
| 23262 |
+
"epoch": 0.2573331775154844,
|
| 23263 |
+
"grad_norm": 0.5556265115737915,
|
| 23264 |
+
"learning_rate": 0.00019645006977953017,
|
| 23265 |
+
"loss": 0.2933,
|
| 23266 |
+
"step": 3303
|
| 23267 |
+
},
|
| 23268 |
+
{
|
| 23269 |
+
"epoch": 0.2574110864399517,
|
| 23270 |
+
"grad_norm": 0.6277108192443848,
|
| 23271 |
+
"learning_rate": 0.0001964479306479916,
|
| 23272 |
+
"loss": 0.4444,
|
| 23273 |
+
"step": 3304
|
| 23274 |
+
},
|
| 23275 |
+
{
|
| 23276 |
+
"epoch": 0.257488995364419,
|
| 23277 |
+
"grad_norm": 0.5672405362129211,
|
| 23278 |
+
"learning_rate": 0.0001964457769645378,
|
| 23279 |
+
"loss": 0.3481,
|
| 23280 |
+
"step": 3305
|
| 23281 |
+
},
|
| 23282 |
+
{
|
| 23283 |
+
"epoch": 0.2575669042888863,
|
| 23284 |
+
"grad_norm": 0.6121922731399536,
|
| 23285 |
+
"learning_rate": 0.00019644360872916877,
|
| 23286 |
+
"loss": 0.3701,
|
| 23287 |
+
"step": 3306
|
| 23288 |
+
},
|
| 23289 |
+
{
|
| 23290 |
+
"epoch": 0.2576448132133536,
|
| 23291 |
+
"grad_norm": 0.6586460471153259,
|
| 23292 |
+
"learning_rate": 0.00019644144049379975,
|
| 23293 |
+
"loss": 0.4458,
|
| 23294 |
+
"step": 3307
|
| 23295 |
+
},
|
| 23296 |
+
{
|
| 23297 |
+
"epoch": 0.2577227221378209,
|
| 23298 |
+
"grad_norm": 0.5649852156639099,
|
| 23299 |
+
"learning_rate": 0.00019643928681034595,
|
| 23300 |
+
"loss": 0.3925,
|
| 23301 |
+
"step": 3308
|
| 23302 |
+
},
|
| 23303 |
+
{
|
| 23304 |
+
"epoch": 0.2578006310622882,
|
| 23305 |
+
"grad_norm": 0.5628907680511475,
|
| 23306 |
+
"learning_rate": 0.00019643713312689215,
|
| 23307 |
+
"loss": 0.3512,
|
| 23308 |
+
"step": 3309
|
| 23309 |
+
},
|
| 23310 |
+
{
|
| 23311 |
+
"epoch": 0.2578785399867555,
|
| 23312 |
+
"grad_norm": 0.44619470834732056,
|
| 23313 |
+
"learning_rate": 0.00019643496489152312,
|
| 23314 |
+
"loss": 0.4045,
|
| 23315 |
+
"step": 3310
|
| 23316 |
+
},
|
| 23317 |
+
{
|
| 23318 |
+
"epoch": 0.25795644891122277,
|
| 23319 |
+
"grad_norm": 0.683813750743866,
|
| 23320 |
+
"learning_rate": 0.00019643281120806932,
|
| 23321 |
+
"loss": 0.3384,
|
| 23322 |
+
"step": 3311
|
| 23323 |
+
},
|
| 23324 |
+
{
|
| 23325 |
+
"epoch": 0.25803435783569006,
|
| 23326 |
+
"grad_norm": 0.6961197257041931,
|
| 23327 |
+
"learning_rate": 0.0001964306429727003,
|
| 23328 |
+
"loss": 0.4114,
|
| 23329 |
+
"step": 3312
|
| 23330 |
+
},
|
| 23331 |
+
{
|
| 23332 |
+
"epoch": 0.25811226676015736,
|
| 23333 |
+
"grad_norm": 0.46807223558425903,
|
| 23334 |
+
"learning_rate": 0.0001964284892892465,
|
| 23335 |
+
"loss": 0.2694,
|
| 23336 |
+
"step": 3313
|
| 23337 |
+
},
|
| 23338 |
+
{
|
| 23339 |
+
"epoch": 0.25819017568462466,
|
| 23340 |
+
"grad_norm": 0.5744104981422424,
|
| 23341 |
+
"learning_rate": 0.00019642632105387747,
|
| 23342 |
+
"loss": 0.243,
|
| 23343 |
+
"step": 3314
|
| 23344 |
+
},
|
| 23345 |
+
{
|
| 23346 |
+
"epoch": 0.25826808460909195,
|
| 23347 |
+
"grad_norm": 0.5456708073616028,
|
| 23348 |
+
"learning_rate": 0.00019642415281850845,
|
| 23349 |
+
"loss": 0.3609,
|
| 23350 |
+
"step": 3315
|
| 23351 |
+
},
|
| 23352 |
+
{
|
| 23353 |
+
"epoch": 0.25834599353355925,
|
| 23354 |
+
"grad_norm": 0.6204336285591125,
|
| 23355 |
+
"learning_rate": 0.00019642198458313942,
|
| 23356 |
+
"loss": 0.4311,
|
| 23357 |
+
"step": 3316
|
| 23358 |
+
},
|
| 23359 |
+
{
|
| 23360 |
+
"epoch": 0.25842390245802654,
|
| 23361 |
+
"grad_norm": 0.5026257634162903,
|
| 23362 |
+
"learning_rate": 0.00019641983089968562,
|
| 23363 |
+
"loss": 0.3669,
|
| 23364 |
+
"step": 3317
|
| 23365 |
+
},
|
| 23366 |
+
{
|
| 23367 |
+
"epoch": 0.25850181138249384,
|
| 23368 |
+
"grad_norm": 0.5051338076591492,
|
| 23369 |
+
"learning_rate": 0.0001964176626643166,
|
| 23370 |
+
"loss": 0.333,
|
| 23371 |
+
"step": 3318
|
| 23372 |
+
},
|
| 23373 |
+
{
|
| 23374 |
+
"epoch": 0.25857972030696114,
|
| 23375 |
+
"grad_norm": 0.5692809224128723,
|
| 23376 |
+
"learning_rate": 0.00019641549442894757,
|
| 23377 |
+
"loss": 0.3202,
|
| 23378 |
+
"step": 3319
|
| 23379 |
+
},
|
| 23380 |
+
{
|
| 23381 |
+
"epoch": 0.2586576292314285,
|
| 23382 |
+
"grad_norm": 0.5144463181495667,
|
| 23383 |
+
"learning_rate": 0.00019641332619357854,
|
| 23384 |
+
"loss": 0.216,
|
| 23385 |
+
"step": 3320
|
| 23386 |
+
},
|
| 23387 |
+
{
|
| 23388 |
+
"epoch": 0.2587355381558958,
|
| 23389 |
+
"grad_norm": 0.6410982608795166,
|
| 23390 |
+
"learning_rate": 0.00019641115795820951,
|
| 23391 |
+
"loss": 0.4149,
|
| 23392 |
+
"step": 3321
|
| 23393 |
+
},
|
| 23394 |
+
{
|
| 23395 |
+
"epoch": 0.2588134470803631,
|
| 23396 |
+
"grad_norm": 0.5964701771736145,
|
| 23397 |
+
"learning_rate": 0.0001964089897228405,
|
| 23398 |
+
"loss": 0.3277,
|
| 23399 |
+
"step": 3322
|
| 23400 |
+
},
|
| 23401 |
+
{
|
| 23402 |
+
"epoch": 0.2588913560048304,
|
| 23403 |
+
"grad_norm": 0.674593448638916,
|
| 23404 |
+
"learning_rate": 0.00019640682148747146,
|
| 23405 |
+
"loss": 0.4456,
|
| 23406 |
+
"step": 3323
|
| 23407 |
+
},
|
| 23408 |
+
{
|
| 23409 |
+
"epoch": 0.25896926492929767,
|
| 23410 |
+
"grad_norm": 0.9825165271759033,
|
| 23411 |
+
"learning_rate": 0.00019640465325210243,
|
| 23412 |
+
"loss": 0.4399,
|
| 23413 |
+
"step": 3324
|
| 23414 |
+
},
|
| 23415 |
+
{
|
| 23416 |
+
"epoch": 0.25904717385376497,
|
| 23417 |
+
"grad_norm": 0.8398321866989136,
|
| 23418 |
+
"learning_rate": 0.0001964024850167334,
|
| 23419 |
+
"loss": 0.3054,
|
| 23420 |
+
"step": 3325
|
| 23421 |
+
},
|
| 23422 |
+
{
|
| 23423 |
+
"epoch": 0.25912508277823226,
|
| 23424 |
+
"grad_norm": 0.5899589657783508,
|
| 23425 |
+
"learning_rate": 0.00019640031678136438,
|
| 23426 |
+
"loss": 0.3493,
|
| 23427 |
+
"step": 3326
|
| 23428 |
+
},
|
| 23429 |
+
{
|
| 23430 |
+
"epoch": 0.25920299170269956,
|
| 23431 |
+
"grad_norm": 0.6801294684410095,
|
| 23432 |
+
"learning_rate": 0.00019639814854599535,
|
| 23433 |
+
"loss": 0.3637,
|
| 23434 |
+
"step": 3327
|
| 23435 |
+
},
|
| 23436 |
+
{
|
| 23437 |
+
"epoch": 0.25928090062716685,
|
| 23438 |
+
"grad_norm": 0.6567696928977966,
|
| 23439 |
+
"learning_rate": 0.00019639598031062633,
|
| 23440 |
+
"loss": 0.3332,
|
| 23441 |
+
"step": 3328
|
| 23442 |
+
},
|
| 23443 |
+
{
|
| 23444 |
+
"epoch": 0.25935880955163415,
|
| 23445 |
+
"grad_norm": 0.48817339539527893,
|
| 23446 |
+
"learning_rate": 0.00019639379752334207,
|
| 23447 |
+
"loss": 0.3155,
|
| 23448 |
+
"step": 3329
|
| 23449 |
+
},
|
| 23450 |
+
{
|
| 23451 |
+
"epoch": 0.25943671847610145,
|
| 23452 |
+
"grad_norm": 0.6027638912200928,
|
| 23453 |
+
"learning_rate": 0.00019639162928797305,
|
| 23454 |
+
"loss": 0.2818,
|
| 23455 |
+
"step": 3330
|
| 23456 |
+
},
|
| 23457 |
+
{
|
| 23458 |
+
"epoch": 0.25951462740056874,
|
| 23459 |
+
"grad_norm": 0.7377107739448547,
|
| 23460 |
+
"learning_rate": 0.00019638946105260402,
|
| 23461 |
+
"loss": 0.3022,
|
| 23462 |
+
"step": 3331
|
| 23463 |
+
},
|
| 23464 |
+
{
|
| 23465 |
+
"epoch": 0.25959253632503604,
|
| 23466 |
+
"grad_norm": 0.594479501247406,
|
| 23467 |
+
"learning_rate": 0.00019638727826531976,
|
| 23468 |
+
"loss": 0.3125,
|
| 23469 |
+
"step": 3332
|
| 23470 |
+
},
|
| 23471 |
+
{
|
| 23472 |
+
"epoch": 0.25967044524950333,
|
| 23473 |
+
"grad_norm": 0.5130565762519836,
|
| 23474 |
+
"learning_rate": 0.00019638511002995074,
|
| 23475 |
+
"loss": 0.2997,
|
| 23476 |
+
"step": 3333
|
| 23477 |
+
},
|
| 23478 |
+
{
|
| 23479 |
+
"epoch": 0.25974835417397063,
|
| 23480 |
+
"grad_norm": 0.7239642143249512,
|
| 23481 |
+
"learning_rate": 0.00019638292724266648,
|
| 23482 |
+
"loss": 0.4071,
|
| 23483 |
+
"step": 3334
|
| 23484 |
+
},
|
| 23485 |
+
{
|
| 23486 |
+
"epoch": 0.2598262630984379,
|
| 23487 |
+
"grad_norm": 0.47540801763534546,
|
| 23488 |
+
"learning_rate": 0.00019638075900729746,
|
| 23489 |
+
"loss": 0.3762,
|
| 23490 |
+
"step": 3335
|
| 23491 |
+
},
|
| 23492 |
+
{
|
| 23493 |
+
"epoch": 0.2599041720229052,
|
| 23494 |
+
"grad_norm": 0.5777168273925781,
|
| 23495 |
+
"learning_rate": 0.0001963785762200132,
|
| 23496 |
+
"loss": 0.3677,
|
| 23497 |
+
"step": 3336
|
| 23498 |
+
},
|
| 23499 |
+
{
|
| 23500 |
+
"epoch": 0.2599820809473725,
|
| 23501 |
+
"grad_norm": 0.5683912038803101,
|
| 23502 |
+
"learning_rate": 0.00019637639343272895,
|
| 23503 |
+
"loss": 0.3952,
|
| 23504 |
+
"step": 3337
|
| 23505 |
+
},
|
| 23506 |
+
{
|
| 23507 |
+
"epoch": 0.2600599898718398,
|
| 23508 |
+
"grad_norm": 0.6080047488212585,
|
| 23509 |
+
"learning_rate": 0.00019637422519735992,
|
| 23510 |
+
"loss": 0.2784,
|
| 23511 |
+
"step": 3338
|
| 23512 |
+
},
|
| 23513 |
+
{
|
| 23514 |
+
"epoch": 0.2601378987963071,
|
| 23515 |
+
"grad_norm": 0.6565389633178711,
|
| 23516 |
+
"learning_rate": 0.00019637204241007566,
|
| 23517 |
+
"loss": 0.3228,
|
| 23518 |
+
"step": 3339
|
| 23519 |
+
},
|
| 23520 |
+
{
|
| 23521 |
+
"epoch": 0.2602158077207744,
|
| 23522 |
+
"grad_norm": 0.5479505658149719,
|
| 23523 |
+
"learning_rate": 0.0001963698596227914,
|
| 23524 |
+
"loss": 0.2677,
|
| 23525 |
+
"step": 3340
|
| 23526 |
+
},
|
| 23527 |
+
{
|
| 23528 |
+
"epoch": 0.2602937166452417,
|
| 23529 |
+
"grad_norm": 0.6100160479545593,
|
| 23530 |
+
"learning_rate": 0.00019636767683550715,
|
| 23531 |
+
"loss": 0.2961,
|
| 23532 |
+
"step": 3341
|
| 23533 |
+
},
|
| 23534 |
+
{
|
| 23535 |
+
"epoch": 0.260371625569709,
|
| 23536 |
+
"grad_norm": 0.41502997279167175,
|
| 23537 |
+
"learning_rate": 0.0001963654940482229,
|
| 23538 |
+
"loss": 0.1952,
|
| 23539 |
+
"step": 3342
|
| 23540 |
+
},
|
| 23541 |
+
{
|
| 23542 |
+
"epoch": 0.2604495344941763,
|
| 23543 |
+
"grad_norm": 0.644861102104187,
|
| 23544 |
+
"learning_rate": 0.00019636331126093864,
|
| 23545 |
+
"loss": 0.3028,
|
| 23546 |
+
"step": 3343
|
| 23547 |
+
},
|
| 23548 |
+
{
|
| 23549 |
+
"epoch": 0.2605274434186436,
|
| 23550 |
+
"grad_norm": 0.5978960394859314,
|
| 23551 |
+
"learning_rate": 0.0001963611284736544,
|
| 23552 |
+
"loss": 0.2066,
|
| 23553 |
+
"step": 3344
|
| 23554 |
+
},
|
| 23555 |
+
{
|
| 23556 |
+
"epoch": 0.2606053523431109,
|
| 23557 |
+
"grad_norm": 0.7081514000892639,
|
| 23558 |
+
"learning_rate": 0.00019635894568637013,
|
| 23559 |
+
"loss": 0.3392,
|
| 23560 |
+
"step": 3345
|
| 23561 |
+
},
|
| 23562 |
+
{
|
| 23563 |
+
"epoch": 0.2606832612675782,
|
| 23564 |
+
"grad_norm": 0.7809303998947144,
|
| 23565 |
+
"learning_rate": 0.00019635676289908588,
|
| 23566 |
+
"loss": 0.6052,
|
| 23567 |
+
"step": 3346
|
| 23568 |
+
},
|
| 23569 |
+
{
|
| 23570 |
+
"epoch": 0.2607611701920455,
|
| 23571 |
+
"grad_norm": 0.6267102956771851,
|
| 23572 |
+
"learning_rate": 0.00019635458011180162,
|
| 23573 |
+
"loss": 0.2694,
|
| 23574 |
+
"step": 3347
|
| 23575 |
+
},
|
| 23576 |
+
{
|
| 23577 |
+
"epoch": 0.2608390791165128,
|
| 23578 |
+
"grad_norm": 0.8897649645805359,
|
| 23579 |
+
"learning_rate": 0.00019635239732451737,
|
| 23580 |
+
"loss": 0.4096,
|
| 23581 |
+
"step": 3348
|
| 23582 |
+
},
|
| 23583 |
+
{
|
| 23584 |
+
"epoch": 0.26091698804098007,
|
| 23585 |
+
"grad_norm": 0.6581563949584961,
|
| 23586 |
+
"learning_rate": 0.00019635021453723311,
|
| 23587 |
+
"loss": 0.3789,
|
| 23588 |
+
"step": 3349
|
| 23589 |
+
},
|
| 23590 |
+
{
|
| 23591 |
+
"epoch": 0.26099489696544736,
|
| 23592 |
+
"grad_norm": 0.631004273891449,
|
| 23593 |
+
"learning_rate": 0.00019634801719803363,
|
| 23594 |
+
"loss": 0.3855,
|
| 23595 |
+
"step": 3350
|
| 23596 |
+
},
|
| 23597 |
+
{
|
| 23598 |
+
"epoch": 0.2610728058899147,
|
| 23599 |
+
"grad_norm": 0.7378699779510498,
|
| 23600 |
+
"learning_rate": 0.00019634583441074938,
|
| 23601 |
+
"loss": 0.4598,
|
| 23602 |
+
"step": 3351
|
| 23603 |
+
},
|
| 23604 |
+
{
|
| 23605 |
+
"epoch": 0.261150714814382,
|
| 23606 |
+
"grad_norm": 0.5705403089523315,
|
| 23607 |
+
"learning_rate": 0.0001963436370715499,
|
| 23608 |
+
"loss": 0.3988,
|
| 23609 |
+
"step": 3352
|
| 23610 |
+
},
|
| 23611 |
+
{
|
| 23612 |
+
"epoch": 0.2612286237388493,
|
| 23613 |
+
"grad_norm": 0.4680072069168091,
|
| 23614 |
+
"learning_rate": 0.00019634146883618087,
|
| 23615 |
+
"loss": 0.1802,
|
| 23616 |
+
"step": 3353
|
| 23617 |
+
},
|
| 23618 |
+
{
|
| 23619 |
+
"epoch": 0.2613065326633166,
|
| 23620 |
+
"grad_norm": 0.6187443137168884,
|
| 23621 |
+
"learning_rate": 0.00019633927149698138,
|
| 23622 |
+
"loss": 0.2908,
|
| 23623 |
+
"step": 3354
|
| 23624 |
+
},
|
| 23625 |
+
{
|
| 23626 |
+
"epoch": 0.2613844415877839,
|
| 23627 |
+
"grad_norm": 0.7931640148162842,
|
| 23628 |
+
"learning_rate": 0.00019633708870969713,
|
| 23629 |
+
"loss": 0.4098,
|
| 23630 |
+
"step": 3355
|
| 23631 |
+
},
|
| 23632 |
+
{
|
| 23633 |
+
"epoch": 0.2614623505122512,
|
| 23634 |
+
"grad_norm": 0.6151863932609558,
|
| 23635 |
+
"learning_rate": 0.00019633489137049764,
|
| 23636 |
+
"loss": 0.5057,
|
| 23637 |
+
"step": 3356
|
| 23638 |
+
},
|
| 23639 |
+
{
|
| 23640 |
+
"epoch": 0.2615402594367185,
|
| 23641 |
+
"grad_norm": 0.7180975079536438,
|
| 23642 |
+
"learning_rate": 0.00019633269403129816,
|
| 23643 |
+
"loss": 0.5605,
|
| 23644 |
+
"step": 3357
|
| 23645 |
+
},
|
| 23646 |
+
{
|
| 23647 |
+
"epoch": 0.2616181683611858,
|
| 23648 |
+
"grad_norm": 0.7103408575057983,
|
| 23649 |
+
"learning_rate": 0.0001963305112440139,
|
| 23650 |
+
"loss": 0.413,
|
| 23651 |
+
"step": 3358
|
| 23652 |
+
},
|
| 23653 |
+
{
|
| 23654 |
+
"epoch": 0.2616960772856531,
|
| 23655 |
+
"grad_norm": 0.6150344610214233,
|
| 23656 |
+
"learning_rate": 0.00019632831390481442,
|
| 23657 |
+
"loss": 0.3392,
|
| 23658 |
+
"step": 3359
|
| 23659 |
+
},
|
| 23660 |
+
{
|
| 23661 |
+
"epoch": 0.2617739862101204,
|
| 23662 |
+
"grad_norm": 0.518500804901123,
|
| 23663 |
+
"learning_rate": 0.00019632613111753017,
|
| 23664 |
+
"loss": 0.2765,
|
| 23665 |
+
"step": 3360
|
| 23666 |
+
},
|
| 23667 |
+
{
|
| 23668 |
+
"epoch": 0.2618518951345877,
|
| 23669 |
+
"grad_norm": 0.7377042770385742,
|
| 23670 |
+
"learning_rate": 0.00019632393377833068,
|
| 23671 |
+
"loss": 0.5266,
|
| 23672 |
+
"step": 3361
|
| 23673 |
+
},
|
| 23674 |
+
{
|
| 23675 |
+
"epoch": 0.26192980405905497,
|
| 23676 |
+
"grad_norm": 0.6329694390296936,
|
| 23677 |
+
"learning_rate": 0.00019632175099104643,
|
| 23678 |
+
"loss": 0.406,
|
| 23679 |
+
"step": 3362
|
| 23680 |
+
},
|
| 23681 |
+
{
|
| 23682 |
+
"epoch": 0.26200771298352227,
|
| 23683 |
+
"grad_norm": 0.5361419320106506,
|
| 23684 |
+
"learning_rate": 0.00019631953909993172,
|
| 23685 |
+
"loss": 0.3258,
|
| 23686 |
+
"step": 3363
|
| 23687 |
+
},
|
| 23688 |
+
{
|
| 23689 |
+
"epoch": 0.26208562190798956,
|
| 23690 |
+
"grad_norm": 0.5869506001472473,
|
| 23691 |
+
"learning_rate": 0.00019631734176073223,
|
| 23692 |
+
"loss": 0.4462,
|
| 23693 |
+
"step": 3364
|
| 23694 |
+
},
|
| 23695 |
+
{
|
| 23696 |
+
"epoch": 0.26216353083245686,
|
| 23697 |
+
"grad_norm": 0.6154558658599854,
|
| 23698 |
+
"learning_rate": 0.00019631515897344798,
|
| 23699 |
+
"loss": 0.4462,
|
| 23700 |
+
"step": 3365
|
| 23701 |
+
},
|
| 23702 |
+
{
|
| 23703 |
+
"epoch": 0.26224143975692416,
|
| 23704 |
+
"grad_norm": 0.6136952638626099,
|
| 23705 |
+
"learning_rate": 0.0001963129616342485,
|
| 23706 |
+
"loss": 0.3538,
|
| 23707 |
+
"step": 3366
|
| 23708 |
+
},
|
| 23709 |
+
{
|
| 23710 |
+
"epoch": 0.26231934868139145,
|
| 23711 |
+
"grad_norm": 0.8131573796272278,
|
| 23712 |
+
"learning_rate": 0.000196310764295049,
|
| 23713 |
+
"loss": 0.4664,
|
| 23714 |
+
"step": 3367
|
| 23715 |
+
},
|
| 23716 |
+
{
|
| 23717 |
+
"epoch": 0.26239725760585875,
|
| 23718 |
+
"grad_norm": 0.540601909160614,
|
| 23719 |
+
"learning_rate": 0.00019630856695584953,
|
| 23720 |
+
"loss": 0.477,
|
| 23721 |
+
"step": 3368
|
| 23722 |
+
},
|
| 23723 |
+
{
|
| 23724 |
+
"epoch": 0.26247516653032604,
|
| 23725 |
+
"grad_norm": 0.5243192315101624,
|
| 23726 |
+
"learning_rate": 0.00019630636961665004,
|
| 23727 |
+
"loss": 0.3852,
|
| 23728 |
+
"step": 3369
|
| 23729 |
+
},
|
| 23730 |
+
{
|
| 23731 |
+
"epoch": 0.26255307545479334,
|
| 23732 |
+
"grad_norm": 0.6711352467536926,
|
| 23733 |
+
"learning_rate": 0.00019630415772553533,
|
| 23734 |
+
"loss": 0.3177,
|
| 23735 |
+
"step": 3370
|
| 23736 |
+
},
|
| 23737 |
+
{
|
| 23738 |
+
"epoch": 0.26263098437926063,
|
| 23739 |
+
"grad_norm": 0.4639964699745178,
|
| 23740 |
+
"learning_rate": 0.00019630197493825108,
|
| 23741 |
+
"loss": 0.3746,
|
| 23742 |
+
"step": 3371
|
| 23743 |
+
},
|
| 23744 |
+
{
|
| 23745 |
+
"epoch": 0.26270889330372793,
|
| 23746 |
+
"grad_norm": 0.5173535943031311,
|
| 23747 |
+
"learning_rate": 0.00019629976304713637,
|
| 23748 |
+
"loss": 0.3041,
|
| 23749 |
+
"step": 3372
|
| 23750 |
+
},
|
| 23751 |
+
{
|
| 23752 |
+
"epoch": 0.2627868022281952,
|
| 23753 |
+
"grad_norm": 0.6379304528236389,
|
| 23754 |
+
"learning_rate": 0.00019629756570793688,
|
| 23755 |
+
"loss": 0.298,
|
| 23756 |
+
"step": 3373
|
| 23757 |
+
},
|
| 23758 |
+
{
|
| 23759 |
+
"epoch": 0.2628647111526625,
|
| 23760 |
+
"grad_norm": 0.6384372711181641,
|
| 23761 |
+
"learning_rate": 0.0001962953683687374,
|
| 23762 |
+
"loss": 0.4803,
|
| 23763 |
+
"step": 3374
|
| 23764 |
+
},
|
| 23765 |
+
{
|
| 23766 |
+
"epoch": 0.2629426200771298,
|
| 23767 |
+
"grad_norm": 0.528658926486969,
|
| 23768 |
+
"learning_rate": 0.0001962931564776227,
|
| 23769 |
+
"loss": 0.3849,
|
| 23770 |
+
"step": 3375
|
| 23771 |
+
},
|
| 23772 |
+
{
|
| 23773 |
+
"epoch": 0.2630205290015971,
|
| 23774 |
+
"grad_norm": 0.7615652680397034,
|
| 23775 |
+
"learning_rate": 0.0001962909591384232,
|
| 23776 |
+
"loss": 0.4236,
|
| 23777 |
+
"step": 3376
|
| 23778 |
+
},
|
| 23779 |
+
{
|
| 23780 |
+
"epoch": 0.2630984379260644,
|
| 23781 |
+
"grad_norm": 0.6947478652000427,
|
| 23782 |
+
"learning_rate": 0.00019628876179922372,
|
| 23783 |
+
"loss": 0.4969,
|
| 23784 |
+
"step": 3377
|
| 23785 |
+
},
|
| 23786 |
+
{
|
| 23787 |
+
"epoch": 0.2631763468505317,
|
| 23788 |
+
"grad_norm": 0.6295408606529236,
|
| 23789 |
+
"learning_rate": 0.000196286549908109,
|
| 23790 |
+
"loss": 0.2846,
|
| 23791 |
+
"step": 3378
|
| 23792 |
+
},
|
| 23793 |
+
{
|
| 23794 |
+
"epoch": 0.263254255774999,
|
| 23795 |
+
"grad_norm": 0.6125136017799377,
|
| 23796 |
+
"learning_rate": 0.00019628435256890953,
|
| 23797 |
+
"loss": 0.3928,
|
| 23798 |
+
"step": 3379
|
| 23799 |
+
},
|
| 23800 |
+
{
|
| 23801 |
+
"epoch": 0.2633321646994663,
|
| 23802 |
+
"grad_norm": 0.5277166366577148,
|
| 23803 |
+
"learning_rate": 0.00019628214067779481,
|
| 23804 |
+
"loss": 0.4169,
|
| 23805 |
+
"step": 3380
|
| 23806 |
+
},
|
| 23807 |
+
{
|
| 23808 |
+
"epoch": 0.26341007362393365,
|
| 23809 |
+
"grad_norm": 0.5917048454284668,
|
| 23810 |
+
"learning_rate": 0.00019627994333859533,
|
| 23811 |
+
"loss": 0.2313,
|
| 23812 |
+
"step": 3381
|
| 23813 |
+
},
|
| 23814 |
+
{
|
| 23815 |
+
"epoch": 0.26348798254840095,
|
| 23816 |
+
"grad_norm": 0.6162173748016357,
|
| 23817 |
+
"learning_rate": 0.00019627773144748062,
|
| 23818 |
+
"loss": 0.2668,
|
| 23819 |
+
"step": 3382
|
| 23820 |
+
},
|
| 23821 |
+
{
|
| 23822 |
+
"epoch": 0.26356589147286824,
|
| 23823 |
+
"grad_norm": 0.5813966393470764,
|
| 23824 |
+
"learning_rate": 0.0001962755195563659,
|
| 23825 |
+
"loss": 0.2811,
|
| 23826 |
+
"step": 3383
|
| 23827 |
+
},
|
| 23828 |
+
{
|
| 23829 |
+
"epoch": 0.26364380039733554,
|
| 23830 |
+
"grad_norm": 0.5924177765846252,
|
| 23831 |
+
"learning_rate": 0.0001962733076652512,
|
| 23832 |
+
"loss": 0.2425,
|
| 23833 |
+
"step": 3384
|
| 23834 |
+
},
|
| 23835 |
+
{
|
| 23836 |
+
"epoch": 0.26372170932180283,
|
| 23837 |
+
"grad_norm": 0.6684619784355164,
|
| 23838 |
+
"learning_rate": 0.0001962711103260517,
|
| 23839 |
+
"loss": 0.3006,
|
| 23840 |
+
"step": 3385
|
| 23841 |
+
},
|
| 23842 |
+
{
|
| 23843 |
+
"epoch": 0.26379961824627013,
|
| 23844 |
+
"grad_norm": 0.8645442724227905,
|
| 23845 |
+
"learning_rate": 0.000196268898434937,
|
| 23846 |
+
"loss": 0.3502,
|
| 23847 |
+
"step": 3386
|
| 23848 |
+
},
|
| 23849 |
+
{
|
| 23850 |
+
"epoch": 0.2638775271707374,
|
| 23851 |
+
"grad_norm": 0.5467059016227722,
|
| 23852 |
+
"learning_rate": 0.0001962666865438223,
|
| 23853 |
+
"loss": 0.2284,
|
| 23854 |
+
"step": 3387
|
| 23855 |
+
},
|
| 23856 |
+
{
|
| 23857 |
+
"epoch": 0.2639554360952047,
|
| 23858 |
+
"grad_norm": 0.7654725313186646,
|
| 23859 |
+
"learning_rate": 0.00019626447465270758,
|
| 23860 |
+
"loss": 0.6057,
|
| 23861 |
+
"step": 3388
|
| 23862 |
+
},
|
| 23863 |
+
{
|
| 23864 |
+
"epoch": 0.264033345019672,
|
| 23865 |
+
"grad_norm": 0.6194819808006287,
|
| 23866 |
+
"learning_rate": 0.00019626226276159286,
|
| 23867 |
+
"loss": 0.3221,
|
| 23868 |
+
"step": 3389
|
| 23869 |
+
},
|
| 23870 |
+
{
|
| 23871 |
+
"epoch": 0.2641112539441393,
|
| 23872 |
+
"grad_norm": 0.6022887825965881,
|
| 23873 |
+
"learning_rate": 0.00019626005087047815,
|
| 23874 |
+
"loss": 0.43,
|
| 23875 |
+
"step": 3390
|
| 23876 |
+
},
|
| 23877 |
+
{
|
| 23878 |
+
"epoch": 0.2641891628686066,
|
| 23879 |
+
"grad_norm": 0.5463837385177612,
|
| 23880 |
+
"learning_rate": 0.00019625783897936344,
|
| 23881 |
+
"loss": 0.3558,
|
| 23882 |
+
"step": 3391
|
| 23883 |
+
},
|
| 23884 |
+
{
|
| 23885 |
+
"epoch": 0.2642670717930739,
|
| 23886 |
+
"grad_norm": 0.4681384563446045,
|
| 23887 |
+
"learning_rate": 0.0001962556125363335,
|
| 23888 |
+
"loss": 0.2748,
|
| 23889 |
+
"step": 3392
|
| 23890 |
+
},
|
| 23891 |
+
{
|
| 23892 |
+
"epoch": 0.2643449807175412,
|
| 23893 |
+
"grad_norm": 0.6391708850860596,
|
| 23894 |
+
"learning_rate": 0.00019625341519713402,
|
| 23895 |
+
"loss": 0.4137,
|
| 23896 |
+
"step": 3393
|
| 23897 |
+
},
|
| 23898 |
+
{
|
| 23899 |
+
"epoch": 0.2644228896420085,
|
| 23900 |
+
"grad_norm": 0.5850754380226135,
|
| 23901 |
+
"learning_rate": 0.0001962512033060193,
|
| 23902 |
+
"loss": 0.4663,
|
| 23903 |
+
"step": 3394
|
| 23904 |
+
},
|
| 23905 |
+
{
|
| 23906 |
+
"epoch": 0.2645007985664758,
|
| 23907 |
+
"grad_norm": 0.44514065980911255,
|
| 23908 |
+
"learning_rate": 0.00019624897686298937,
|
| 23909 |
+
"loss": 0.2179,
|
| 23910 |
+
"step": 3395
|
| 23911 |
+
},
|
| 23912 |
+
{
|
| 23913 |
+
"epoch": 0.2645787074909431,
|
| 23914 |
+
"grad_norm": 0.6466753482818604,
|
| 23915 |
+
"learning_rate": 0.00019624676497187465,
|
| 23916 |
+
"loss": 0.43,
|
| 23917 |
+
"step": 3396
|
| 23918 |
+
},
|
| 23919 |
+
{
|
| 23920 |
+
"epoch": 0.2646566164154104,
|
| 23921 |
+
"grad_norm": 0.6273171901702881,
|
| 23922 |
+
"learning_rate": 0.00019624453852884471,
|
| 23923 |
+
"loss": 0.3653,
|
| 23924 |
+
"step": 3397
|
| 23925 |
+
},
|
| 23926 |
+
{
|
| 23927 |
+
"epoch": 0.2647345253398777,
|
| 23928 |
+
"grad_norm": 0.5294480323791504,
|
| 23929 |
+
"learning_rate": 0.00019624232663773,
|
| 23930 |
+
"loss": 0.3981,
|
| 23931 |
+
"step": 3398
|
| 23932 |
+
},
|
| 23933 |
+
{
|
| 23934 |
+
"epoch": 0.264812434264345,
|
| 23935 |
+
"grad_norm": 0.6815981864929199,
|
| 23936 |
+
"learning_rate": 0.0001962401147466153,
|
| 23937 |
+
"loss": 0.5037,
|
| 23938 |
+
"step": 3399
|
| 23939 |
+
},
|
| 23940 |
+
{
|
| 23941 |
+
"epoch": 0.26489034318881227,
|
| 23942 |
+
"grad_norm": 0.6488057971000671,
|
| 23943 |
+
"learning_rate": 0.00019623788830358535,
|
| 23944 |
+
"loss": 0.3814,
|
| 23945 |
+
"step": 3400
|
| 23946 |
+
},
|
| 23947 |
+
{
|
| 23948 |
+
"epoch": 0.26489034318881227,
|
| 23949 |
+
"eval_loss": 0.3508091866970062,
|
| 23950 |
+
"eval_runtime": 96.7602,
|
| 23951 |
+
"eval_samples_per_second": 5.333,
|
| 23952 |
+
"eval_steps_per_second": 2.666,
|
| 23953 |
+
"step": 3400
|
| 23954 |
}
|
| 23955 |
],
|
| 23956 |
"logging_steps": 1,
|
|
|
|
| 23965 |
"early_stopping_threshold": 0.0
|
| 23966 |
},
|
| 23967 |
"attributes": {
|
| 23968 |
+
"early_stopping_patience_counter": 3
|
| 23969 |
}
|
| 23970 |
},
|
| 23971 |
"TrainerControl": {
|
|
|
|
| 23974 |
"should_evaluate": false,
|
| 23975 |
"should_log": false,
|
| 23976 |
"should_save": true,
|
| 23977 |
+
"should_training_stop": true
|
| 23978 |
},
|
| 23979 |
"attributes": {}
|
| 23980 |
}
|
| 23981 |
},
|
| 23982 |
+
"total_flos": 5.872940911385641e+17,
|
| 23983 |
"train_batch_size": 2,
|
| 23984 |
"trial_name": null,
|
| 23985 |
"trial_params": null
|