Fanucci
commited on
Training in progress, step 1600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 671149168
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ecb4977766b5ec564c832820d337f28943d6ee0a8e063ebb6f30dd60257300f
|
| 3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1342555602
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:187087330c9ab915ab8e333f763a087fdd1d714f2c3f60bf422f2ebd54456325
|
| 3 |
size 1342555602
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1506806fb02da53f329e6caf8828bb52767a1586a3df56ca79e2b42e438f89ce
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62774dbefac2540dfe5b09c1c73847886a74832f098a072954309e3f6dfaa4eb
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 3.1351470947265625,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -9871,6 +9871,1414 @@
|
|
| 9871 |
"eval_samples_per_second": 5.636,
|
| 9872 |
"eval_steps_per_second": 1.409,
|
| 9873 |
"step": 1400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9874 |
}
|
| 9875 |
],
|
| 9876 |
"logging_steps": 1,
|
|
@@ -9885,7 +11293,7 @@
|
|
| 9885 |
"early_stopping_threshold": 0.0
|
| 9886 |
},
|
| 9887 |
"attributes": {
|
| 9888 |
-
"early_stopping_patience_counter":
|
| 9889 |
}
|
| 9890 |
},
|
| 9891 |
"TrainerControl": {
|
|
@@ -9899,7 +11307,7 @@
|
|
| 9899 |
"attributes": {}
|
| 9900 |
}
|
| 9901 |
},
|
| 9902 |
-
"total_flos": 1.
|
| 9903 |
"train_batch_size": 4,
|
| 9904 |
"trial_name": null,
|
| 9905 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 3.1351470947265625,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
| 4 |
+
"epoch": 0.5119180931051032,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 1600,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 9871 |
"eval_samples_per_second": 5.636,
|
| 9872 |
"eval_steps_per_second": 1.409,
|
| 9873 |
"step": 1400
|
| 9874 |
+
},
|
| 9875 |
+
{
|
| 9876 |
+
"epoch": 0.44824828027515595,
|
| 9877 |
+
"grad_norm": 12.275089263916016,
|
| 9878 |
+
"learning_rate": 0.00023614138687126887,
|
| 9879 |
+
"loss": 6.384,
|
| 9880 |
+
"step": 1401
|
| 9881 |
+
},
|
| 9882 |
+
{
|
| 9883 |
+
"epoch": 0.44856822908334665,
|
| 9884 |
+
"grad_norm": 14.296133995056152,
|
| 9885 |
+
"learning_rate": 0.00023594752846909414,
|
| 9886 |
+
"loss": 6.2824,
|
| 9887 |
+
"step": 1402
|
| 9888 |
+
},
|
| 9889 |
+
{
|
| 9890 |
+
"epoch": 0.44888817789153734,
|
| 9891 |
+
"grad_norm": 23.578289031982422,
|
| 9892 |
+
"learning_rate": 0.0002357536425375651,
|
| 9893 |
+
"loss": 6.1043,
|
| 9894 |
+
"step": 1403
|
| 9895 |
+
},
|
| 9896 |
+
{
|
| 9897 |
+
"epoch": 0.44920812669972804,
|
| 9898 |
+
"grad_norm": 17.572853088378906,
|
| 9899 |
+
"learning_rate": 0.00023555972928238737,
|
| 9900 |
+
"loss": 6.3525,
|
| 9901 |
+
"step": 1404
|
| 9902 |
+
},
|
| 9903 |
+
{
|
| 9904 |
+
"epoch": 0.44952807550791873,
|
| 9905 |
+
"grad_norm": 13.461702346801758,
|
| 9906 |
+
"learning_rate": 0.00023536578890929582,
|
| 9907 |
+
"loss": 6.3522,
|
| 9908 |
+
"step": 1405
|
| 9909 |
+
},
|
| 9910 |
+
{
|
| 9911 |
+
"epoch": 0.44984802431610943,
|
| 9912 |
+
"grad_norm": 14.324708938598633,
|
| 9913 |
+
"learning_rate": 0.00023517182162405368,
|
| 9914 |
+
"loss": 6.2278,
|
| 9915 |
+
"step": 1406
|
| 9916 |
+
},
|
| 9917 |
+
{
|
| 9918 |
+
"epoch": 0.4501679731243001,
|
| 9919 |
+
"grad_norm": 17.58342933654785,
|
| 9920 |
+
"learning_rate": 0.0002349778276324532,
|
| 9921 |
+
"loss": 6.4268,
|
| 9922 |
+
"step": 1407
|
| 9923 |
+
},
|
| 9924 |
+
{
|
| 9925 |
+
"epoch": 0.4504879219324908,
|
| 9926 |
+
"grad_norm": 13.684356689453125,
|
| 9927 |
+
"learning_rate": 0.0002347838071403146,
|
| 9928 |
+
"loss": 6.353,
|
| 9929 |
+
"step": 1408
|
| 9930 |
+
},
|
| 9931 |
+
{
|
| 9932 |
+
"epoch": 0.4508078707406815,
|
| 9933 |
+
"grad_norm": 13.513612747192383,
|
| 9934 |
+
"learning_rate": 0.0002345897603534862,
|
| 9935 |
+
"loss": 6.361,
|
| 9936 |
+
"step": 1409
|
| 9937 |
+
},
|
| 9938 |
+
{
|
| 9939 |
+
"epoch": 0.45112781954887216,
|
| 9940 |
+
"grad_norm": 12.80638313293457,
|
| 9941 |
+
"learning_rate": 0.0002343956874778447,
|
| 9942 |
+
"loss": 6.3029,
|
| 9943 |
+
"step": 1410
|
| 9944 |
+
},
|
| 9945 |
+
{
|
| 9946 |
+
"epoch": 0.45144776835706285,
|
| 9947 |
+
"grad_norm": 16.215721130371094,
|
| 9948 |
+
"learning_rate": 0.00023420158871929393,
|
| 9949 |
+
"loss": 6.3764,
|
| 9950 |
+
"step": 1411
|
| 9951 |
+
},
|
| 9952 |
+
{
|
| 9953 |
+
"epoch": 0.45176771716525355,
|
| 9954 |
+
"grad_norm": 25.084028244018555,
|
| 9955 |
+
"learning_rate": 0.00023400746428376538,
|
| 9956 |
+
"loss": 6.2,
|
| 9957 |
+
"step": 1412
|
| 9958 |
+
},
|
| 9959 |
+
{
|
| 9960 |
+
"epoch": 0.45208766597344424,
|
| 9961 |
+
"grad_norm": 12.46812915802002,
|
| 9962 |
+
"learning_rate": 0.00023381331437721784,
|
| 9963 |
+
"loss": 6.2141,
|
| 9964 |
+
"step": 1413
|
| 9965 |
+
},
|
| 9966 |
+
{
|
| 9967 |
+
"epoch": 0.45240761478163494,
|
| 9968 |
+
"grad_norm": 18.606529235839844,
|
| 9969 |
+
"learning_rate": 0.00023361913920563705,
|
| 9970 |
+
"loss": 6.2929,
|
| 9971 |
+
"step": 1414
|
| 9972 |
+
},
|
| 9973 |
+
{
|
| 9974 |
+
"epoch": 0.45272756358982563,
|
| 9975 |
+
"grad_norm": 12.622028350830078,
|
| 9976 |
+
"learning_rate": 0.00023342493897503567,
|
| 9977 |
+
"loss": 6.2309,
|
| 9978 |
+
"step": 1415
|
| 9979 |
+
},
|
| 9980 |
+
{
|
| 9981 |
+
"epoch": 0.4530475123980163,
|
| 9982 |
+
"grad_norm": 30.32240867614746,
|
| 9983 |
+
"learning_rate": 0.00023323071389145278,
|
| 9984 |
+
"loss": 6.3812,
|
| 9985 |
+
"step": 1416
|
| 9986 |
+
},
|
| 9987 |
+
{
|
| 9988 |
+
"epoch": 0.453367461206207,
|
| 9989 |
+
"grad_norm": 35.68700408935547,
|
| 9990 |
+
"learning_rate": 0.00023303646416095384,
|
| 9991 |
+
"loss": 6.3141,
|
| 9992 |
+
"step": 1417
|
| 9993 |
+
},
|
| 9994 |
+
{
|
| 9995 |
+
"epoch": 0.4536874100143977,
|
| 9996 |
+
"grad_norm": 25.01828956604004,
|
| 9997 |
+
"learning_rate": 0.0002328421899896307,
|
| 9998 |
+
"loss": 6.363,
|
| 9999 |
+
"step": 1418
|
| 10000 |
+
},
|
| 10001 |
+
{
|
| 10002 |
+
"epoch": 0.45400735882258836,
|
| 10003 |
+
"grad_norm": 16.23634910583496,
|
| 10004 |
+
"learning_rate": 0.00023264789158360086,
|
| 10005 |
+
"loss": 6.3071,
|
| 10006 |
+
"step": 1419
|
| 10007 |
+
},
|
| 10008 |
+
{
|
| 10009 |
+
"epoch": 0.45432730763077905,
|
| 10010 |
+
"grad_norm": 15.429924011230469,
|
| 10011 |
+
"learning_rate": 0.0002324535691490076,
|
| 10012 |
+
"loss": 6.3,
|
| 10013 |
+
"step": 1420
|
| 10014 |
+
},
|
| 10015 |
+
{
|
| 10016 |
+
"epoch": 0.45464725643896975,
|
| 10017 |
+
"grad_norm": 30.830224990844727,
|
| 10018 |
+
"learning_rate": 0.00023225922289201988,
|
| 10019 |
+
"loss": 6.2047,
|
| 10020 |
+
"step": 1421
|
| 10021 |
+
},
|
| 10022 |
+
{
|
| 10023 |
+
"epoch": 0.45496720524716044,
|
| 10024 |
+
"grad_norm": 14.196430206298828,
|
| 10025 |
+
"learning_rate": 0.00023206485301883163,
|
| 10026 |
+
"loss": 6.3659,
|
| 10027 |
+
"step": 1422
|
| 10028 |
+
},
|
| 10029 |
+
{
|
| 10030 |
+
"epoch": 0.45528715405535114,
|
| 10031 |
+
"grad_norm": 16.008033752441406,
|
| 10032 |
+
"learning_rate": 0.0002318704597356621,
|
| 10033 |
+
"loss": 6.3481,
|
| 10034 |
+
"step": 1423
|
| 10035 |
+
},
|
| 10036 |
+
{
|
| 10037 |
+
"epoch": 0.45560710286354184,
|
| 10038 |
+
"grad_norm": 11.479830741882324,
|
| 10039 |
+
"learning_rate": 0.00023167604324875516,
|
| 10040 |
+
"loss": 6.3219,
|
| 10041 |
+
"step": 1424
|
| 10042 |
+
},
|
| 10043 |
+
{
|
| 10044 |
+
"epoch": 0.45592705167173253,
|
| 10045 |
+
"grad_norm": 17.422664642333984,
|
| 10046 |
+
"learning_rate": 0.00023148160376437957,
|
| 10047 |
+
"loss": 6.3007,
|
| 10048 |
+
"step": 1425
|
| 10049 |
+
},
|
| 10050 |
+
{
|
| 10051 |
+
"epoch": 0.4562470004799232,
|
| 10052 |
+
"grad_norm": 10.73954963684082,
|
| 10053 |
+
"learning_rate": 0.00023128714148882825,
|
| 10054 |
+
"loss": 6.3275,
|
| 10055 |
+
"step": 1426
|
| 10056 |
+
},
|
| 10057 |
+
{
|
| 10058 |
+
"epoch": 0.4565669492881139,
|
| 10059 |
+
"grad_norm": 9.496156692504883,
|
| 10060 |
+
"learning_rate": 0.0002310926566284183,
|
| 10061 |
+
"loss": 6.179,
|
| 10062 |
+
"step": 1427
|
| 10063 |
+
},
|
| 10064 |
+
{
|
| 10065 |
+
"epoch": 0.4568868980963046,
|
| 10066 |
+
"grad_norm": 13.453152656555176,
|
| 10067 |
+
"learning_rate": 0.00023089814938949098,
|
| 10068 |
+
"loss": 6.0879,
|
| 10069 |
+
"step": 1428
|
| 10070 |
+
},
|
| 10071 |
+
{
|
| 10072 |
+
"epoch": 0.45720684690449526,
|
| 10073 |
+
"grad_norm": 10.89134693145752,
|
| 10074 |
+
"learning_rate": 0.00023070361997841107,
|
| 10075 |
+
"loss": 6.3478,
|
| 10076 |
+
"step": 1429
|
| 10077 |
+
},
|
| 10078 |
+
{
|
| 10079 |
+
"epoch": 0.45752679571268595,
|
| 10080 |
+
"grad_norm": 10.230031967163086,
|
| 10081 |
+
"learning_rate": 0.00023050906860156708,
|
| 10082 |
+
"loss": 6.0896,
|
| 10083 |
+
"step": 1430
|
| 10084 |
+
},
|
| 10085 |
+
{
|
| 10086 |
+
"epoch": 0.45784674452087665,
|
| 10087 |
+
"grad_norm": 16.458038330078125,
|
| 10088 |
+
"learning_rate": 0.00023031449546537065,
|
| 10089 |
+
"loss": 6.5301,
|
| 10090 |
+
"step": 1431
|
| 10091 |
+
},
|
| 10092 |
+
{
|
| 10093 |
+
"epoch": 0.45816669332906734,
|
| 10094 |
+
"grad_norm": 13.3131103515625,
|
| 10095 |
+
"learning_rate": 0.00023011990077625663,
|
| 10096 |
+
"loss": 6.2881,
|
| 10097 |
+
"step": 1432
|
| 10098 |
+
},
|
| 10099 |
+
{
|
| 10100 |
+
"epoch": 0.45848664213725804,
|
| 10101 |
+
"grad_norm": 9.168004989624023,
|
| 10102 |
+
"learning_rate": 0.00022992528474068266,
|
| 10103 |
+
"loss": 6.3154,
|
| 10104 |
+
"step": 1433
|
| 10105 |
+
},
|
| 10106 |
+
{
|
| 10107 |
+
"epoch": 0.45880659094544873,
|
| 10108 |
+
"grad_norm": 35.18294143676758,
|
| 10109 |
+
"learning_rate": 0.00022973064756512903,
|
| 10110 |
+
"loss": 6.2735,
|
| 10111 |
+
"step": 1434
|
| 10112 |
+
},
|
| 10113 |
+
{
|
| 10114 |
+
"epoch": 0.45912653975363943,
|
| 10115 |
+
"grad_norm": 11.018784523010254,
|
| 10116 |
+
"learning_rate": 0.0002295359894560985,
|
| 10117 |
+
"loss": 6.4403,
|
| 10118 |
+
"step": 1435
|
| 10119 |
+
},
|
| 10120 |
+
{
|
| 10121 |
+
"epoch": 0.4594464885618301,
|
| 10122 |
+
"grad_norm": 8.920594215393066,
|
| 10123 |
+
"learning_rate": 0.00022934131062011607,
|
| 10124 |
+
"loss": 6.0729,
|
| 10125 |
+
"step": 1436
|
| 10126 |
+
},
|
| 10127 |
+
{
|
| 10128 |
+
"epoch": 0.4597664373700208,
|
| 10129 |
+
"grad_norm": 13.392729759216309,
|
| 10130 |
+
"learning_rate": 0.00022914661126372855,
|
| 10131 |
+
"loss": 6.2305,
|
| 10132 |
+
"step": 1437
|
| 10133 |
+
},
|
| 10134 |
+
{
|
| 10135 |
+
"epoch": 0.46008638617821146,
|
| 10136 |
+
"grad_norm": 12.815661430358887,
|
| 10137 |
+
"learning_rate": 0.00022895189159350486,
|
| 10138 |
+
"loss": 6.2008,
|
| 10139 |
+
"step": 1438
|
| 10140 |
+
},
|
| 10141 |
+
{
|
| 10142 |
+
"epoch": 0.46040633498640215,
|
| 10143 |
+
"grad_norm": 16.973766326904297,
|
| 10144 |
+
"learning_rate": 0.00022875715181603506,
|
| 10145 |
+
"loss": 6.1381,
|
| 10146 |
+
"step": 1439
|
| 10147 |
+
},
|
| 10148 |
+
{
|
| 10149 |
+
"epoch": 0.46072628379459285,
|
| 10150 |
+
"grad_norm": 13.373908996582031,
|
| 10151 |
+
"learning_rate": 0.00022856239213793088,
|
| 10152 |
+
"loss": 6.3298,
|
| 10153 |
+
"step": 1440
|
| 10154 |
+
},
|
| 10155 |
+
{
|
| 10156 |
+
"epoch": 0.46104623260278355,
|
| 10157 |
+
"grad_norm": 8.596816062927246,
|
| 10158 |
+
"learning_rate": 0.00022836761276582497,
|
| 10159 |
+
"loss": 6.3952,
|
| 10160 |
+
"step": 1441
|
| 10161 |
+
},
|
| 10162 |
+
{
|
| 10163 |
+
"epoch": 0.46136618141097424,
|
| 10164 |
+
"grad_norm": 57.45488739013672,
|
| 10165 |
+
"learning_rate": 0.00022817281390637089,
|
| 10166 |
+
"loss": 6.3979,
|
| 10167 |
+
"step": 1442
|
| 10168 |
+
},
|
| 10169 |
+
{
|
| 10170 |
+
"epoch": 0.46168613021916494,
|
| 10171 |
+
"grad_norm": 22.90778160095215,
|
| 10172 |
+
"learning_rate": 0.00022797799576624304,
|
| 10173 |
+
"loss": 6.2665,
|
| 10174 |
+
"step": 1443
|
| 10175 |
+
},
|
| 10176 |
+
{
|
| 10177 |
+
"epoch": 0.46200607902735563,
|
| 10178 |
+
"grad_norm": 17.244796752929688,
|
| 10179 |
+
"learning_rate": 0.0002277831585521361,
|
| 10180 |
+
"loss": 6.3962,
|
| 10181 |
+
"step": 1444
|
| 10182 |
+
},
|
| 10183 |
+
{
|
| 10184 |
+
"epoch": 0.4623260278355463,
|
| 10185 |
+
"grad_norm": 10.941102027893066,
|
| 10186 |
+
"learning_rate": 0.00022758830247076505,
|
| 10187 |
+
"loss": 6.1673,
|
| 10188 |
+
"step": 1445
|
| 10189 |
+
},
|
| 10190 |
+
{
|
| 10191 |
+
"epoch": 0.462645976643737,
|
| 10192 |
+
"grad_norm": 15.169724464416504,
|
| 10193 |
+
"learning_rate": 0.00022739342772886488,
|
| 10194 |
+
"loss": 6.5136,
|
| 10195 |
+
"step": 1446
|
| 10196 |
+
},
|
| 10197 |
+
{
|
| 10198 |
+
"epoch": 0.4629659254519277,
|
| 10199 |
+
"grad_norm": 9.183232307434082,
|
| 10200 |
+
"learning_rate": 0.0002271985345331904,
|
| 10201 |
+
"loss": 6.1549,
|
| 10202 |
+
"step": 1447
|
| 10203 |
+
},
|
| 10204 |
+
{
|
| 10205 |
+
"epoch": 0.46328587426011836,
|
| 10206 |
+
"grad_norm": 10.990023612976074,
|
| 10207 |
+
"learning_rate": 0.00022700362309051593,
|
| 10208 |
+
"loss": 6.2485,
|
| 10209 |
+
"step": 1448
|
| 10210 |
+
},
|
| 10211 |
+
{
|
| 10212 |
+
"epoch": 0.46360582306830905,
|
| 10213 |
+
"grad_norm": 10.13391399383545,
|
| 10214 |
+
"learning_rate": 0.00022680869360763528,
|
| 10215 |
+
"loss": 6.3059,
|
| 10216 |
+
"step": 1449
|
| 10217 |
+
},
|
| 10218 |
+
{
|
| 10219 |
+
"epoch": 0.46392577187649975,
|
| 10220 |
+
"grad_norm": 18.721927642822266,
|
| 10221 |
+
"learning_rate": 0.00022661374629136125,
|
| 10222 |
+
"loss": 6.3244,
|
| 10223 |
+
"step": 1450
|
| 10224 |
+
},
|
| 10225 |
+
{
|
| 10226 |
+
"epoch": 0.46424572068469044,
|
| 10227 |
+
"grad_norm": 7.544312000274658,
|
| 10228 |
+
"learning_rate": 0.00022641878134852558,
|
| 10229 |
+
"loss": 6.2333,
|
| 10230 |
+
"step": 1451
|
| 10231 |
+
},
|
| 10232 |
+
{
|
| 10233 |
+
"epoch": 0.46456566949288114,
|
| 10234 |
+
"grad_norm": 11.716202735900879,
|
| 10235 |
+
"learning_rate": 0.00022622379898597897,
|
| 10236 |
+
"loss": 6.0736,
|
| 10237 |
+
"step": 1452
|
| 10238 |
+
},
|
| 10239 |
+
{
|
| 10240 |
+
"epoch": 0.46488561830107183,
|
| 10241 |
+
"grad_norm": 8.775861740112305,
|
| 10242 |
+
"learning_rate": 0.00022602879941059013,
|
| 10243 |
+
"loss": 6.2285,
|
| 10244 |
+
"step": 1453
|
| 10245 |
+
},
|
| 10246 |
+
{
|
| 10247 |
+
"epoch": 0.46520556710926253,
|
| 10248 |
+
"grad_norm": 8.8626708984375,
|
| 10249 |
+
"learning_rate": 0.0002258337828292464,
|
| 10250 |
+
"loss": 6.3011,
|
| 10251 |
+
"step": 1454
|
| 10252 |
+
},
|
| 10253 |
+
{
|
| 10254 |
+
"epoch": 0.4655255159174532,
|
| 10255 |
+
"grad_norm": 15.481707572937012,
|
| 10256 |
+
"learning_rate": 0.00022563874944885317,
|
| 10257 |
+
"loss": 6.1798,
|
| 10258 |
+
"step": 1455
|
| 10259 |
+
},
|
| 10260 |
+
{
|
| 10261 |
+
"epoch": 0.4658454647256439,
|
| 10262 |
+
"grad_norm": 12.100132942199707,
|
| 10263 |
+
"learning_rate": 0.0002254436994763334,
|
| 10264 |
+
"loss": 6.1638,
|
| 10265 |
+
"step": 1456
|
| 10266 |
+
},
|
| 10267 |
+
{
|
| 10268 |
+
"epoch": 0.46616541353383456,
|
| 10269 |
+
"grad_norm": 10.16502857208252,
|
| 10270 |
+
"learning_rate": 0.00022524863311862783,
|
| 10271 |
+
"loss": 6.4787,
|
| 10272 |
+
"step": 1457
|
| 10273 |
+
},
|
| 10274 |
+
{
|
| 10275 |
+
"epoch": 0.46648536234202526,
|
| 10276 |
+
"grad_norm": 10.59926986694336,
|
| 10277 |
+
"learning_rate": 0.00022505355058269455,
|
| 10278 |
+
"loss": 6.2177,
|
| 10279 |
+
"step": 1458
|
| 10280 |
+
},
|
| 10281 |
+
{
|
| 10282 |
+
"epoch": 0.46680531115021595,
|
| 10283 |
+
"grad_norm": 10.0380220413208,
|
| 10284 |
+
"learning_rate": 0.00022485845207550882,
|
| 10285 |
+
"loss": 6.2375,
|
| 10286 |
+
"step": 1459
|
| 10287 |
+
},
|
| 10288 |
+
{
|
| 10289 |
+
"epoch": 0.46712525995840665,
|
| 10290 |
+
"grad_norm": 10.464765548706055,
|
| 10291 |
+
"learning_rate": 0.00022466333780406283,
|
| 10292 |
+
"loss": 6.1937,
|
| 10293 |
+
"step": 1460
|
| 10294 |
+
},
|
| 10295 |
+
{
|
| 10296 |
+
"epoch": 0.46744520876659734,
|
| 10297 |
+
"grad_norm": 10.147469520568848,
|
| 10298 |
+
"learning_rate": 0.00022446820797536555,
|
| 10299 |
+
"loss": 6.36,
|
| 10300 |
+
"step": 1461
|
| 10301 |
+
},
|
| 10302 |
+
{
|
| 10303 |
+
"epoch": 0.46776515757478804,
|
| 10304 |
+
"grad_norm": 10.764010429382324,
|
| 10305 |
+
"learning_rate": 0.00022427306279644232,
|
| 10306 |
+
"loss": 6.3815,
|
| 10307 |
+
"step": 1462
|
| 10308 |
+
},
|
| 10309 |
+
{
|
| 10310 |
+
"epoch": 0.46808510638297873,
|
| 10311 |
+
"grad_norm": 71.32337188720703,
|
| 10312 |
+
"learning_rate": 0.00022407790247433492,
|
| 10313 |
+
"loss": 6.2424,
|
| 10314 |
+
"step": 1463
|
| 10315 |
+
},
|
| 10316 |
+
{
|
| 10317 |
+
"epoch": 0.46840505519116943,
|
| 10318 |
+
"grad_norm": 11.261139869689941,
|
| 10319 |
+
"learning_rate": 0.0002238827272161011,
|
| 10320 |
+
"loss": 6.441,
|
| 10321 |
+
"step": 1464
|
| 10322 |
+
},
|
| 10323 |
+
{
|
| 10324 |
+
"epoch": 0.4687250039993601,
|
| 10325 |
+
"grad_norm": 10.223528861999512,
|
| 10326 |
+
"learning_rate": 0.00022368753722881444,
|
| 10327 |
+
"loss": 6.3204,
|
| 10328 |
+
"step": 1465
|
| 10329 |
+
},
|
| 10330 |
+
{
|
| 10331 |
+
"epoch": 0.4690449528075508,
|
| 10332 |
+
"grad_norm": 9.939703941345215,
|
| 10333 |
+
"learning_rate": 0.00022349233271956438,
|
| 10334 |
+
"loss": 6.2861,
|
| 10335 |
+
"step": 1466
|
| 10336 |
+
},
|
| 10337 |
+
{
|
| 10338 |
+
"epoch": 0.46936490161574146,
|
| 10339 |
+
"grad_norm": 11.785332679748535,
|
| 10340 |
+
"learning_rate": 0.00022329711389545528,
|
| 10341 |
+
"loss": 6.128,
|
| 10342 |
+
"step": 1467
|
| 10343 |
+
},
|
| 10344 |
+
{
|
| 10345 |
+
"epoch": 0.46968485042393215,
|
| 10346 |
+
"grad_norm": 6.066036224365234,
|
| 10347 |
+
"learning_rate": 0.00022310188096360726,
|
| 10348 |
+
"loss": 6.2245,
|
| 10349 |
+
"step": 1468
|
| 10350 |
+
},
|
| 10351 |
+
{
|
| 10352 |
+
"epoch": 0.47000479923212285,
|
| 10353 |
+
"grad_norm": 12.680907249450684,
|
| 10354 |
+
"learning_rate": 0.00022290663413115507,
|
| 10355 |
+
"loss": 6.3803,
|
| 10356 |
+
"step": 1469
|
| 10357 |
+
},
|
| 10358 |
+
{
|
| 10359 |
+
"epoch": 0.47032474804031354,
|
| 10360 |
+
"grad_norm": 9.246479034423828,
|
| 10361 |
+
"learning_rate": 0.00022271137360524822,
|
| 10362 |
+
"loss": 6.426,
|
| 10363 |
+
"step": 1470
|
| 10364 |
+
},
|
| 10365 |
+
{
|
| 10366 |
+
"epoch": 0.47064469684850424,
|
| 10367 |
+
"grad_norm": 12.644569396972656,
|
| 10368 |
+
"learning_rate": 0.00022251609959305082,
|
| 10369 |
+
"loss": 6.344,
|
| 10370 |
+
"step": 1471
|
| 10371 |
+
},
|
| 10372 |
+
{
|
| 10373 |
+
"epoch": 0.47096464565669494,
|
| 10374 |
+
"grad_norm": 10.939471244812012,
|
| 10375 |
+
"learning_rate": 0.00022232081230174125,
|
| 10376 |
+
"loss": 6.0337,
|
| 10377 |
+
"step": 1472
|
| 10378 |
+
},
|
| 10379 |
+
{
|
| 10380 |
+
"epoch": 0.47128459446488563,
|
| 10381 |
+
"grad_norm": 7.102919101715088,
|
| 10382 |
+
"learning_rate": 0.00022212551193851203,
|
| 10383 |
+
"loss": 6.3417,
|
| 10384 |
+
"step": 1473
|
| 10385 |
+
},
|
| 10386 |
+
{
|
| 10387 |
+
"epoch": 0.4716045432730763,
|
| 10388 |
+
"grad_norm": 13.819934844970703,
|
| 10389 |
+
"learning_rate": 0.00022193019871056958,
|
| 10390 |
+
"loss": 6.1443,
|
| 10391 |
+
"step": 1474
|
| 10392 |
+
},
|
| 10393 |
+
{
|
| 10394 |
+
"epoch": 0.471924492081267,
|
| 10395 |
+
"grad_norm": 7.774622917175293,
|
| 10396 |
+
"learning_rate": 0.0002217348728251338,
|
| 10397 |
+
"loss": 6.2144,
|
| 10398 |
+
"step": 1475
|
| 10399 |
+
},
|
| 10400 |
+
{
|
| 10401 |
+
"epoch": 0.47224444088945766,
|
| 10402 |
+
"grad_norm": 12.074188232421875,
|
| 10403 |
+
"learning_rate": 0.00022153953448943815,
|
| 10404 |
+
"loss": 6.1406,
|
| 10405 |
+
"step": 1476
|
| 10406 |
+
},
|
| 10407 |
+
{
|
| 10408 |
+
"epoch": 0.47256438969764836,
|
| 10409 |
+
"grad_norm": 8.314803123474121,
|
| 10410 |
+
"learning_rate": 0.00022134418391072937,
|
| 10411 |
+
"loss": 6.1434,
|
| 10412 |
+
"step": 1477
|
| 10413 |
+
},
|
| 10414 |
+
{
|
| 10415 |
+
"epoch": 0.47288433850583905,
|
| 10416 |
+
"grad_norm": 11.96036148071289,
|
| 10417 |
+
"learning_rate": 0.00022114882129626695,
|
| 10418 |
+
"loss": 6.2643,
|
| 10419 |
+
"step": 1478
|
| 10420 |
+
},
|
| 10421 |
+
{
|
| 10422 |
+
"epoch": 0.47320428731402975,
|
| 10423 |
+
"grad_norm": 15.879840850830078,
|
| 10424 |
+
"learning_rate": 0.00022095344685332338,
|
| 10425 |
+
"loss": 6.2671,
|
| 10426 |
+
"step": 1479
|
| 10427 |
+
},
|
| 10428 |
+
{
|
| 10429 |
+
"epoch": 0.47352423612222044,
|
| 10430 |
+
"grad_norm": 13.139908790588379,
|
| 10431 |
+
"learning_rate": 0.00022075806078918363,
|
| 10432 |
+
"loss": 6.0182,
|
| 10433 |
+
"step": 1480
|
| 10434 |
+
},
|
| 10435 |
+
{
|
| 10436 |
+
"epoch": 0.47384418493041114,
|
| 10437 |
+
"grad_norm": 13.335620880126953,
|
| 10438 |
+
"learning_rate": 0.00022056266331114494,
|
| 10439 |
+
"loss": 6.137,
|
| 10440 |
+
"step": 1481
|
| 10441 |
+
},
|
| 10442 |
+
{
|
| 10443 |
+
"epoch": 0.47416413373860183,
|
| 10444 |
+
"grad_norm": 26.10761260986328,
|
| 10445 |
+
"learning_rate": 0.00022036725462651672,
|
| 10446 |
+
"loss": 6.2739,
|
| 10447 |
+
"step": 1482
|
| 10448 |
+
},
|
| 10449 |
+
{
|
| 10450 |
+
"epoch": 0.47448408254679253,
|
| 10451 |
+
"grad_norm": 141.016845703125,
|
| 10452 |
+
"learning_rate": 0.00022017183494262027,
|
| 10453 |
+
"loss": 6.2765,
|
| 10454 |
+
"step": 1483
|
| 10455 |
+
},
|
| 10456 |
+
{
|
| 10457 |
+
"epoch": 0.4748040313549832,
|
| 10458 |
+
"grad_norm": 30.745948791503906,
|
| 10459 |
+
"learning_rate": 0.00021997640446678852,
|
| 10460 |
+
"loss": 6.3736,
|
| 10461 |
+
"step": 1484
|
| 10462 |
+
},
|
| 10463 |
+
{
|
| 10464 |
+
"epoch": 0.47512398016317386,
|
| 10465 |
+
"grad_norm": 25.929046630859375,
|
| 10466 |
+
"learning_rate": 0.00021978096340636585,
|
| 10467 |
+
"loss": 6.1721,
|
| 10468 |
+
"step": 1485
|
| 10469 |
+
},
|
| 10470 |
+
{
|
| 10471 |
+
"epoch": 0.47544392897136456,
|
| 10472 |
+
"grad_norm": 748.2965087890625,
|
| 10473 |
+
"learning_rate": 0.00021958551196870797,
|
| 10474 |
+
"loss": 6.4215,
|
| 10475 |
+
"step": 1486
|
| 10476 |
+
},
|
| 10477 |
+
{
|
| 10478 |
+
"epoch": 0.47576387777955526,
|
| 10479 |
+
"grad_norm": 4598.34619140625,
|
| 10480 |
+
"learning_rate": 0.00021939005036118142,
|
| 10481 |
+
"loss": 6.2953,
|
| 10482 |
+
"step": 1487
|
| 10483 |
+
},
|
| 10484 |
+
{
|
| 10485 |
+
"epoch": 0.47608382658774595,
|
| 10486 |
+
"grad_norm": 4421.1689453125,
|
| 10487 |
+
"learning_rate": 0.0002191945787911638,
|
| 10488 |
+
"loss": 6.3312,
|
| 10489 |
+
"step": 1488
|
| 10490 |
+
},
|
| 10491 |
+
{
|
| 10492 |
+
"epoch": 0.47640377539593665,
|
| 10493 |
+
"grad_norm": 47090.1484375,
|
| 10494 |
+
"learning_rate": 0.00021899909746604294,
|
| 10495 |
+
"loss": 6.2649,
|
| 10496 |
+
"step": 1489
|
| 10497 |
+
},
|
| 10498 |
+
{
|
| 10499 |
+
"epoch": 0.47672372420412734,
|
| 10500 |
+
"grad_norm": 3408.970703125,
|
| 10501 |
+
"learning_rate": 0.00021880360659321725,
|
| 10502 |
+
"loss": 6.8902,
|
| 10503 |
+
"step": 1490
|
| 10504 |
+
},
|
| 10505 |
+
{
|
| 10506 |
+
"epoch": 0.47704367301231804,
|
| 10507 |
+
"grad_norm": 60290.5234375,
|
| 10508 |
+
"learning_rate": 0.0002186081063800953,
|
| 10509 |
+
"loss": 11.7343,
|
| 10510 |
+
"step": 1491
|
| 10511 |
+
},
|
| 10512 |
+
{
|
| 10513 |
+
"epoch": 0.47736362182050873,
|
| 10514 |
+
"grad_norm": 331439.40625,
|
| 10515 |
+
"learning_rate": 0.00021841259703409528,
|
| 10516 |
+
"loss": 19.0283,
|
| 10517 |
+
"step": 1492
|
| 10518 |
+
},
|
| 10519 |
+
{
|
| 10520 |
+
"epoch": 0.4776835706286994,
|
| 10521 |
+
"grad_norm": 360846.0,
|
| 10522 |
+
"learning_rate": 0.00021821707876264545,
|
| 10523 |
+
"loss": 20.4589,
|
| 10524 |
+
"step": 1493
|
| 10525 |
+
},
|
| 10526 |
+
{
|
| 10527 |
+
"epoch": 0.4780035194368901,
|
| 10528 |
+
"grad_norm": 7549.9189453125,
|
| 10529 |
+
"learning_rate": 0.00021802155177318334,
|
| 10530 |
+
"loss": 23.1843,
|
| 10531 |
+
"step": 1494
|
| 10532 |
+
},
|
| 10533 |
+
{
|
| 10534 |
+
"epoch": 0.47832346824508076,
|
| 10535 |
+
"grad_norm": 4522283.0,
|
| 10536 |
+
"learning_rate": 0.00021782601627315566,
|
| 10537 |
+
"loss": 23.6448,
|
| 10538 |
+
"step": 1495
|
| 10539 |
+
},
|
| 10540 |
+
{
|
| 10541 |
+
"epoch": 0.47864341705327146,
|
| 10542 |
+
"grad_norm": 7154.123046875,
|
| 10543 |
+
"learning_rate": 0.00021763047247001837,
|
| 10544 |
+
"loss": 25.8975,
|
| 10545 |
+
"step": 1496
|
| 10546 |
+
},
|
| 10547 |
+
{
|
| 10548 |
+
"epoch": 0.47896336586146215,
|
| 10549 |
+
"grad_norm": 17986.6015625,
|
| 10550 |
+
"learning_rate": 0.00021743492057123596,
|
| 10551 |
+
"loss": 26.1484,
|
| 10552 |
+
"step": 1497
|
| 10553 |
+
},
|
| 10554 |
+
{
|
| 10555 |
+
"epoch": 0.47928331466965285,
|
| 10556 |
+
"grad_norm": 13135.7294921875,
|
| 10557 |
+
"learning_rate": 0.00021723936078428175,
|
| 10558 |
+
"loss": 27.4075,
|
| 10559 |
+
"step": 1498
|
| 10560 |
+
},
|
| 10561 |
+
{
|
| 10562 |
+
"epoch": 0.47960326347784354,
|
| 10563 |
+
"grad_norm": 39999.80859375,
|
| 10564 |
+
"learning_rate": 0.0002170437933166374,
|
| 10565 |
+
"loss": 26.3727,
|
| 10566 |
+
"step": 1499
|
| 10567 |
+
},
|
| 10568 |
+
{
|
| 10569 |
+
"epoch": 0.47992321228603424,
|
| 10570 |
+
"grad_norm": 6688.4423828125,
|
| 10571 |
+
"learning_rate": 0.00021684821837579245,
|
| 10572 |
+
"loss": 26.3504,
|
| 10573 |
+
"step": 1500
|
| 10574 |
+
},
|
| 10575 |
+
{
|
| 10576 |
+
"epoch": 0.48024316109422494,
|
| 10577 |
+
"grad_norm": 20799.302734375,
|
| 10578 |
+
"learning_rate": 0.0002166526361692448,
|
| 10579 |
+
"loss": 26.8786,
|
| 10580 |
+
"step": 1501
|
| 10581 |
+
},
|
| 10582 |
+
{
|
| 10583 |
+
"epoch": 0.48056310990241563,
|
| 10584 |
+
"grad_norm": 12096.689453125,
|
| 10585 |
+
"learning_rate": 0.0002164570469044997,
|
| 10586 |
+
"loss": 27.476,
|
| 10587 |
+
"step": 1502
|
| 10588 |
+
},
|
| 10589 |
+
{
|
| 10590 |
+
"epoch": 0.4808830587106063,
|
| 10591 |
+
"grad_norm": 13212.9248046875,
|
| 10592 |
+
"learning_rate": 0.00021626145078907006,
|
| 10593 |
+
"loss": 25.8305,
|
| 10594 |
+
"step": 1503
|
| 10595 |
+
},
|
| 10596 |
+
{
|
| 10597 |
+
"epoch": 0.48120300751879697,
|
| 10598 |
+
"grad_norm": 25206.7734375,
|
| 10599 |
+
"learning_rate": 0.00021606584803047607,
|
| 10600 |
+
"loss": 27.1057,
|
| 10601 |
+
"step": 1504
|
| 10602 |
+
},
|
| 10603 |
+
{
|
| 10604 |
+
"epoch": 0.48152295632698766,
|
| 10605 |
+
"grad_norm": 9155.57421875,
|
| 10606 |
+
"learning_rate": 0.00021587023883624484,
|
| 10607 |
+
"loss": 25.6381,
|
| 10608 |
+
"step": 1505
|
| 10609 |
+
},
|
| 10610 |
+
{
|
| 10611 |
+
"epoch": 0.48184290513517836,
|
| 10612 |
+
"grad_norm": 136102.078125,
|
| 10613 |
+
"learning_rate": 0.00021567462341391043,
|
| 10614 |
+
"loss": 26.7886,
|
| 10615 |
+
"step": 1506
|
| 10616 |
+
},
|
| 10617 |
+
{
|
| 10618 |
+
"epoch": 0.48216285394336905,
|
| 10619 |
+
"grad_norm": 130571.6015625,
|
| 10620 |
+
"learning_rate": 0.00021547900197101347,
|
| 10621 |
+
"loss": 27.0289,
|
| 10622 |
+
"step": 1507
|
| 10623 |
+
},
|
| 10624 |
+
{
|
| 10625 |
+
"epoch": 0.48248280275155975,
|
| 10626 |
+
"grad_norm": 51434.0234375,
|
| 10627 |
+
"learning_rate": 0.00021528337471510094,
|
| 10628 |
+
"loss": 25.754,
|
| 10629 |
+
"step": 1508
|
| 10630 |
+
},
|
| 10631 |
+
{
|
| 10632 |
+
"epoch": 0.48280275155975044,
|
| 10633 |
+
"grad_norm": 1381.776611328125,
|
| 10634 |
+
"learning_rate": 0.00021508774185372595,
|
| 10635 |
+
"loss": 26.9629,
|
| 10636 |
+
"step": 1509
|
| 10637 |
+
},
|
| 10638 |
+
{
|
| 10639 |
+
"epoch": 0.48312270036794114,
|
| 10640 |
+
"grad_norm": 265793.59375,
|
| 10641 |
+
"learning_rate": 0.00021489210359444788,
|
| 10642 |
+
"loss": 25.9567,
|
| 10643 |
+
"step": 1510
|
| 10644 |
+
},
|
| 10645 |
+
{
|
| 10646 |
+
"epoch": 0.48344264917613183,
|
| 10647 |
+
"grad_norm": 845718.25,
|
| 10648 |
+
"learning_rate": 0.00021469646014483137,
|
| 10649 |
+
"loss": 26.3276,
|
| 10650 |
+
"step": 1511
|
| 10651 |
+
},
|
| 10652 |
+
{
|
| 10653 |
+
"epoch": 0.48376259798432253,
|
| 10654 |
+
"grad_norm": 225879.328125,
|
| 10655 |
+
"learning_rate": 0.0002145008117124467,
|
| 10656 |
+
"loss": 25.9805,
|
| 10657 |
+
"step": 1512
|
| 10658 |
+
},
|
| 10659 |
+
{
|
| 10660 |
+
"epoch": 0.4840825467925132,
|
| 10661 |
+
"grad_norm": 436685.65625,
|
| 10662 |
+
"learning_rate": 0.00021430515850486974,
|
| 10663 |
+
"loss": 25.5142,
|
| 10664 |
+
"step": 1513
|
| 10665 |
+
},
|
| 10666 |
+
{
|
| 10667 |
+
"epoch": 0.48440249560070386,
|
| 10668 |
+
"grad_norm": 14073.525390625,
|
| 10669 |
+
"learning_rate": 0.00021410950072968111,
|
| 10670 |
+
"loss": 26.5983,
|
| 10671 |
+
"step": 1514
|
| 10672 |
+
},
|
| 10673 |
+
{
|
| 10674 |
+
"epoch": 0.48472244440889456,
|
| 10675 |
+
"grad_norm": 28912.51953125,
|
| 10676 |
+
"learning_rate": 0.00021391383859446634,
|
| 10677 |
+
"loss": 26.5296,
|
| 10678 |
+
"step": 1515
|
| 10679 |
+
},
|
| 10680 |
+
{
|
| 10681 |
+
"epoch": 0.48504239321708525,
|
| 10682 |
+
"grad_norm": 229632.765625,
|
| 10683 |
+
"learning_rate": 0.0002137181723068157,
|
| 10684 |
+
"loss": 26.8914,
|
| 10685 |
+
"step": 1516
|
| 10686 |
+
},
|
| 10687 |
+
{
|
| 10688 |
+
"epoch": 0.48536234202527595,
|
| 10689 |
+
"grad_norm": 4752278.5,
|
| 10690 |
+
"learning_rate": 0.00021352250207432366,
|
| 10691 |
+
"loss": 26.7629,
|
| 10692 |
+
"step": 1517
|
| 10693 |
+
},
|
| 10694 |
+
{
|
| 10695 |
+
"epoch": 0.48568229083346665,
|
| 10696 |
+
"grad_norm": 16323.1162109375,
|
| 10697 |
+
"learning_rate": 0.00021332682810458912,
|
| 10698 |
+
"loss": 26.3109,
|
| 10699 |
+
"step": 1518
|
| 10700 |
+
},
|
| 10701 |
+
{
|
| 10702 |
+
"epoch": 0.48600223964165734,
|
| 10703 |
+
"grad_norm": 81523.640625,
|
| 10704 |
+
"learning_rate": 0.00021313115060521472,
|
| 10705 |
+
"loss": 26.0956,
|
| 10706 |
+
"step": 1519
|
| 10707 |
+
},
|
| 10708 |
+
{
|
| 10709 |
+
"epoch": 0.48632218844984804,
|
| 10710 |
+
"grad_norm": 651489.125,
|
| 10711 |
+
"learning_rate": 0.000212935469783807,
|
| 10712 |
+
"loss": 23.9349,
|
| 10713 |
+
"step": 1520
|
| 10714 |
+
},
|
| 10715 |
+
{
|
| 10716 |
+
"epoch": 0.48664213725803873,
|
| 10717 |
+
"grad_norm": 111789.7890625,
|
| 10718 |
+
"learning_rate": 0.00021273978584797595,
|
| 10719 |
+
"loss": 26.5997,
|
| 10720 |
+
"step": 1521
|
| 10721 |
+
},
|
| 10722 |
+
{
|
| 10723 |
+
"epoch": 0.4869620860662294,
|
| 10724 |
+
"grad_norm": 445851.625,
|
| 10725 |
+
"learning_rate": 0.00021254409900533494,
|
| 10726 |
+
"loss": 25.0127,
|
| 10727 |
+
"step": 1522
|
| 10728 |
+
},
|
| 10729 |
+
{
|
| 10730 |
+
"epoch": 0.48728203487442007,
|
| 10731 |
+
"grad_norm": 8311.8564453125,
|
| 10732 |
+
"learning_rate": 0.00021234840946350025,
|
| 10733 |
+
"loss": 25.4959,
|
| 10734 |
+
"step": 1523
|
| 10735 |
+
},
|
| 10736 |
+
{
|
| 10737 |
+
"epoch": 0.48760198368261076,
|
| 10738 |
+
"grad_norm": 19274.490234375,
|
| 10739 |
+
"learning_rate": 0.00021215271743009128,
|
| 10740 |
+
"loss": 25.6081,
|
| 10741 |
+
"step": 1524
|
| 10742 |
+
},
|
| 10743 |
+
{
|
| 10744 |
+
"epoch": 0.48792193249080146,
|
| 10745 |
+
"grad_norm": 4866.87109375,
|
| 10746 |
+
"learning_rate": 0.00021195702311272991,
|
| 10747 |
+
"loss": 25.6694,
|
| 10748 |
+
"step": 1525
|
| 10749 |
+
},
|
| 10750 |
+
{
|
| 10751 |
+
"epoch": 0.48824188129899215,
|
| 10752 |
+
"grad_norm": 75604.7421875,
|
| 10753 |
+
"learning_rate": 0.0002117613267190403,
|
| 10754 |
+
"loss": 25.3733,
|
| 10755 |
+
"step": 1526
|
| 10756 |
+
},
|
| 10757 |
+
{
|
| 10758 |
+
"epoch": 0.48856183010718285,
|
| 10759 |
+
"grad_norm": 223031.828125,
|
| 10760 |
+
"learning_rate": 0.00021156562845664917,
|
| 10761 |
+
"loss": 25.7078,
|
| 10762 |
+
"step": 1527
|
| 10763 |
+
},
|
| 10764 |
+
{
|
| 10765 |
+
"epoch": 0.48888177891537354,
|
| 10766 |
+
"grad_norm": 186668.125,
|
| 10767 |
+
"learning_rate": 0.00021136992853318503,
|
| 10768 |
+
"loss": 24.8685,
|
| 10769 |
+
"step": 1528
|
| 10770 |
+
},
|
| 10771 |
+
{
|
| 10772 |
+
"epoch": 0.48920172772356424,
|
| 10773 |
+
"grad_norm": 10703.2900390625,
|
| 10774 |
+
"learning_rate": 0.00021117422715627812,
|
| 10775 |
+
"loss": 25.8425,
|
| 10776 |
+
"step": 1529
|
| 10777 |
+
},
|
| 10778 |
+
{
|
| 10779 |
+
"epoch": 0.48952167653175493,
|
| 10780 |
+
"grad_norm": 112749.9765625,
|
| 10781 |
+
"learning_rate": 0.00021097852453356018,
|
| 10782 |
+
"loss": 25.8862,
|
| 10783 |
+
"step": 1530
|
| 10784 |
+
},
|
| 10785 |
+
{
|
| 10786 |
+
"epoch": 0.48984162533994563,
|
| 10787 |
+
"grad_norm": 4226.068359375,
|
| 10788 |
+
"learning_rate": 0.0002107828208726644,
|
| 10789 |
+
"loss": 24.3479,
|
| 10790 |
+
"step": 1531
|
| 10791 |
+
},
|
| 10792 |
+
{
|
| 10793 |
+
"epoch": 0.4901615741481363,
|
| 10794 |
+
"grad_norm": 18113.078125,
|
| 10795 |
+
"learning_rate": 0.0002105871163812251,
|
| 10796 |
+
"loss": 26.1326,
|
| 10797 |
+
"step": 1532
|
| 10798 |
+
},
|
| 10799 |
+
{
|
| 10800 |
+
"epoch": 0.49048152295632697,
|
| 10801 |
+
"grad_norm": 48981.3203125,
|
| 10802 |
+
"learning_rate": 0.0002103914112668774,
|
| 10803 |
+
"loss": 24.9588,
|
| 10804 |
+
"step": 1533
|
| 10805 |
+
},
|
| 10806 |
+
{
|
| 10807 |
+
"epoch": 0.49080147176451766,
|
| 10808 |
+
"grad_norm": 295377.0625,
|
| 10809 |
+
"learning_rate": 0.00021019570573725687,
|
| 10810 |
+
"loss": 26.1572,
|
| 10811 |
+
"step": 1534
|
| 10812 |
+
},
|
| 10813 |
+
{
|
| 10814 |
+
"epoch": 0.49112142057270836,
|
| 10815 |
+
"grad_norm": 3137.26806640625,
|
| 10816 |
+
"learning_rate": 0.00021,
|
| 10817 |
+
"loss": 25.481,
|
| 10818 |
+
"step": 1535
|
| 10819 |
+
},
|
| 10820 |
+
{
|
| 10821 |
+
"epoch": 0.49144136938089905,
|
| 10822 |
+
"grad_norm": 5773.8046875,
|
| 10823 |
+
"learning_rate": 0.00020980429426274312,
|
| 10824 |
+
"loss": 25.5111,
|
| 10825 |
+
"step": 1536
|
| 10826 |
+
},
|
| 10827 |
+
{
|
| 10828 |
+
"epoch": 0.49176131818908975,
|
| 10829 |
+
"grad_norm": 813.1864013671875,
|
| 10830 |
+
"learning_rate": 0.00020960858873312268,
|
| 10831 |
+
"loss": 24.7992,
|
| 10832 |
+
"step": 1537
|
| 10833 |
+
},
|
| 10834 |
+
{
|
| 10835 |
+
"epoch": 0.49208126699728044,
|
| 10836 |
+
"grad_norm": 91637.9609375,
|
| 10837 |
+
"learning_rate": 0.00020941288361877493,
|
| 10838 |
+
"loss": 25.2568,
|
| 10839 |
+
"step": 1538
|
| 10840 |
+
},
|
| 10841 |
+
{
|
| 10842 |
+
"epoch": 0.49240121580547114,
|
| 10843 |
+
"grad_norm": 295463.03125,
|
| 10844 |
+
"learning_rate": 0.0002092171791273356,
|
| 10845 |
+
"loss": 25.5154,
|
| 10846 |
+
"step": 1539
|
| 10847 |
+
},
|
| 10848 |
+
{
|
| 10849 |
+
"epoch": 0.49272116461366183,
|
| 10850 |
+
"grad_norm": 172231.9375,
|
| 10851 |
+
"learning_rate": 0.00020902147546643986,
|
| 10852 |
+
"loss": 25.6833,
|
| 10853 |
+
"step": 1540
|
| 10854 |
+
},
|
| 10855 |
+
{
|
| 10856 |
+
"epoch": 0.49304111342185253,
|
| 10857 |
+
"grad_norm": 13051.9150390625,
|
| 10858 |
+
"learning_rate": 0.0002088257728437219,
|
| 10859 |
+
"loss": 24.4191,
|
| 10860 |
+
"step": 1541
|
| 10861 |
+
},
|
| 10862 |
+
{
|
| 10863 |
+
"epoch": 0.49336106223004317,
|
| 10864 |
+
"grad_norm": 2055.154296875,
|
| 10865 |
+
"learning_rate": 0.000208630071466815,
|
| 10866 |
+
"loss": 25.3225,
|
| 10867 |
+
"step": 1542
|
| 10868 |
+
},
|
| 10869 |
+
{
|
| 10870 |
+
"epoch": 0.49368101103823386,
|
| 10871 |
+
"grad_norm": 91596.3203125,
|
| 10872 |
+
"learning_rate": 0.00020843437154335082,
|
| 10873 |
+
"loss": 24.9479,
|
| 10874 |
+
"step": 1543
|
| 10875 |
+
},
|
| 10876 |
+
{
|
| 10877 |
+
"epoch": 0.49400095984642456,
|
| 10878 |
+
"grad_norm": 4480.8583984375,
|
| 10879 |
+
"learning_rate": 0.00020823867328095968,
|
| 10880 |
+
"loss": 25.0226,
|
| 10881 |
+
"step": 1544
|
| 10882 |
+
},
|
| 10883 |
+
{
|
| 10884 |
+
"epoch": 0.49432090865461525,
|
| 10885 |
+
"grad_norm": 417.2145080566406,
|
| 10886 |
+
"learning_rate": 0.0002080429768872702,
|
| 10887 |
+
"loss": 24.4093,
|
| 10888 |
+
"step": 1545
|
| 10889 |
+
},
|
| 10890 |
+
{
|
| 10891 |
+
"epoch": 0.49464085746280595,
|
| 10892 |
+
"grad_norm": 1596.4095458984375,
|
| 10893 |
+
"learning_rate": 0.00020784728256990876,
|
| 10894 |
+
"loss": 25.3089,
|
| 10895 |
+
"step": 1546
|
| 10896 |
+
},
|
| 10897 |
+
{
|
| 10898 |
+
"epoch": 0.49496080627099664,
|
| 10899 |
+
"grad_norm": 66.6065444946289,
|
| 10900 |
+
"learning_rate": 0.00020765159053649974,
|
| 10901 |
+
"loss": 24.7633,
|
| 10902 |
+
"step": 1547
|
| 10903 |
+
},
|
| 10904 |
+
{
|
| 10905 |
+
"epoch": 0.49528075507918734,
|
| 10906 |
+
"grad_norm": 168.79971313476562,
|
| 10907 |
+
"learning_rate": 0.00020745590099466513,
|
| 10908 |
+
"loss": 23.7519,
|
| 10909 |
+
"step": 1548
|
| 10910 |
+
},
|
| 10911 |
+
{
|
| 10912 |
+
"epoch": 0.49560070388737804,
|
| 10913 |
+
"grad_norm": 3104.95068359375,
|
| 10914 |
+
"learning_rate": 0.00020726021415202407,
|
| 10915 |
+
"loss": 23.3702,
|
| 10916 |
+
"step": 1549
|
| 10917 |
+
},
|
| 10918 |
+
{
|
| 10919 |
+
"epoch": 0.49592065269556873,
|
| 10920 |
+
"grad_norm": 2233.52978515625,
|
| 10921 |
+
"learning_rate": 0.00020706453021619302,
|
| 10922 |
+
"loss": 21.5469,
|
| 10923 |
+
"step": 1550
|
| 10924 |
+
},
|
| 10925 |
+
{
|
| 10926 |
+
"epoch": 0.49624060150375937,
|
| 10927 |
+
"grad_norm": 2162.917236328125,
|
| 10928 |
+
"learning_rate": 0.00020686884939478533,
|
| 10929 |
+
"loss": 20.991,
|
| 10930 |
+
"step": 1551
|
| 10931 |
+
},
|
| 10932 |
+
{
|
| 10933 |
+
"epoch": 0.49656055031195007,
|
| 10934 |
+
"grad_norm": 55.1094970703125,
|
| 10935 |
+
"learning_rate": 0.0002066731718954109,
|
| 10936 |
+
"loss": 18.9229,
|
| 10937 |
+
"step": 1552
|
| 10938 |
+
},
|
| 10939 |
+
{
|
| 10940 |
+
"epoch": 0.49688049912014076,
|
| 10941 |
+
"grad_norm": 92.81909942626953,
|
| 10942 |
+
"learning_rate": 0.00020647749792567635,
|
| 10943 |
+
"loss": 19.7671,
|
| 10944 |
+
"step": 1553
|
| 10945 |
+
},
|
| 10946 |
+
{
|
| 10947 |
+
"epoch": 0.49720044792833146,
|
| 10948 |
+
"grad_norm": 364.9884338378906,
|
| 10949 |
+
"learning_rate": 0.00020628182769318434,
|
| 10950 |
+
"loss": 16.8552,
|
| 10951 |
+
"step": 1554
|
| 10952 |
+
},
|
| 10953 |
+
{
|
| 10954 |
+
"epoch": 0.49752039673652215,
|
| 10955 |
+
"grad_norm": 394.3650817871094,
|
| 10956 |
+
"learning_rate": 0.00020608616140553365,
|
| 10957 |
+
"loss": 16.8443,
|
| 10958 |
+
"step": 1555
|
| 10959 |
+
},
|
| 10960 |
+
{
|
| 10961 |
+
"epoch": 0.49784034554471285,
|
| 10962 |
+
"grad_norm": 948.4185180664062,
|
| 10963 |
+
"learning_rate": 0.00020589049927031896,
|
| 10964 |
+
"loss": 15.1372,
|
| 10965 |
+
"step": 1556
|
| 10966 |
+
},
|
| 10967 |
+
{
|
| 10968 |
+
"epoch": 0.49816029435290354,
|
| 10969 |
+
"grad_norm": 13.938966751098633,
|
| 10970 |
+
"learning_rate": 0.0002056948414951303,
|
| 10971 |
+
"loss": 16.3998,
|
| 10972 |
+
"step": 1557
|
| 10973 |
+
},
|
| 10974 |
+
{
|
| 10975 |
+
"epoch": 0.49848024316109424,
|
| 10976 |
+
"grad_norm": 33.71576690673828,
|
| 10977 |
+
"learning_rate": 0.0002054991882875533,
|
| 10978 |
+
"loss": 16.4799,
|
| 10979 |
+
"step": 1558
|
| 10980 |
+
},
|
| 10981 |
+
{
|
| 10982 |
+
"epoch": 0.49880019196928493,
|
| 10983 |
+
"grad_norm": 582.8843994140625,
|
| 10984 |
+
"learning_rate": 0.00020530353985516876,
|
| 10985 |
+
"loss": 14.1065,
|
| 10986 |
+
"step": 1559
|
| 10987 |
+
},
|
| 10988 |
+
{
|
| 10989 |
+
"epoch": 0.49912014077747563,
|
| 10990 |
+
"grad_norm": 6479.2900390625,
|
| 10991 |
+
"learning_rate": 0.00020510789640555216,
|
| 10992 |
+
"loss": 13.1232,
|
| 10993 |
+
"step": 1560
|
| 10994 |
+
},
|
| 10995 |
+
{
|
| 10996 |
+
"epoch": 0.49944008958566627,
|
| 10997 |
+
"grad_norm": 27.09345245361328,
|
| 10998 |
+
"learning_rate": 0.000204912258146274,
|
| 10999 |
+
"loss": 13.4933,
|
| 11000 |
+
"step": 1561
|
| 11001 |
+
},
|
| 11002 |
+
{
|
| 11003 |
+
"epoch": 0.49976003839385696,
|
| 11004 |
+
"grad_norm": 38.53468704223633,
|
| 11005 |
+
"learning_rate": 0.00020471662528489913,
|
| 11006 |
+
"loss": 11.0668,
|
| 11007 |
+
"step": 1562
|
| 11008 |
+
},
|
| 11009 |
+
{
|
| 11010 |
+
"epoch": 0.5000799872020477,
|
| 11011 |
+
"grad_norm": 3335.210693359375,
|
| 11012 |
+
"learning_rate": 0.00020452099802898658,
|
| 11013 |
+
"loss": 11.4144,
|
| 11014 |
+
"step": 1563
|
| 11015 |
+
},
|
| 11016 |
+
{
|
| 11017 |
+
"epoch": 0.5003999360102384,
|
| 11018 |
+
"grad_norm": 26.711124420166016,
|
| 11019 |
+
"learning_rate": 0.00020432537658608962,
|
| 11020 |
+
"loss": 10.6639,
|
| 11021 |
+
"step": 1564
|
| 11022 |
+
},
|
| 11023 |
+
{
|
| 11024 |
+
"epoch": 0.5007198848184291,
|
| 11025 |
+
"grad_norm": 17.574525833129883,
|
| 11026 |
+
"learning_rate": 0.0002041297611637552,
|
| 11027 |
+
"loss": 9.7858,
|
| 11028 |
+
"step": 1565
|
| 11029 |
+
},
|
| 11030 |
+
{
|
| 11031 |
+
"epoch": 0.5010398336266197,
|
| 11032 |
+
"grad_norm": 64.17845916748047,
|
| 11033 |
+
"learning_rate": 0.00020393415196952392,
|
| 11034 |
+
"loss": 8.1859,
|
| 11035 |
+
"step": 1566
|
| 11036 |
+
},
|
| 11037 |
+
{
|
| 11038 |
+
"epoch": 0.5013597824348104,
|
| 11039 |
+
"grad_norm": 23.2316951751709,
|
| 11040 |
+
"learning_rate": 0.00020373854921092996,
|
| 11041 |
+
"loss": 8.0373,
|
| 11042 |
+
"step": 1567
|
| 11043 |
+
},
|
| 11044 |
+
{
|
| 11045 |
+
"epoch": 0.5016797312430011,
|
| 11046 |
+
"grad_norm": 12.098379135131836,
|
| 11047 |
+
"learning_rate": 0.00020354295309550033,
|
| 11048 |
+
"loss": 7.2053,
|
| 11049 |
+
"step": 1568
|
| 11050 |
+
},
|
| 11051 |
+
{
|
| 11052 |
+
"epoch": 0.5019996800511918,
|
| 11053 |
+
"grad_norm": 18.08388900756836,
|
| 11054 |
+
"learning_rate": 0.0002033473638307552,
|
| 11055 |
+
"loss": 7.1583,
|
| 11056 |
+
"step": 1569
|
| 11057 |
+
},
|
| 11058 |
+
{
|
| 11059 |
+
"epoch": 0.5023196288593825,
|
| 11060 |
+
"grad_norm": 24.80642318725586,
|
| 11061 |
+
"learning_rate": 0.00020315178162420757,
|
| 11062 |
+
"loss": 7.4845,
|
| 11063 |
+
"step": 1570
|
| 11064 |
+
},
|
| 11065 |
+
{
|
| 11066 |
+
"epoch": 0.5026395776675732,
|
| 11067 |
+
"grad_norm": 211.3668975830078,
|
| 11068 |
+
"learning_rate": 0.00020295620668336266,
|
| 11069 |
+
"loss": 7.5237,
|
| 11070 |
+
"step": 1571
|
| 11071 |
+
},
|
| 11072 |
+
{
|
| 11073 |
+
"epoch": 0.5029595264757639,
|
| 11074 |
+
"grad_norm": 168.52293395996094,
|
| 11075 |
+
"learning_rate": 0.00020276063921571824,
|
| 11076 |
+
"loss": 7.6717,
|
| 11077 |
+
"step": 1572
|
| 11078 |
+
},
|
| 11079 |
+
{
|
| 11080 |
+
"epoch": 0.5032794752839546,
|
| 11081 |
+
"grad_norm": 45.54515838623047,
|
| 11082 |
+
"learning_rate": 0.00020256507942876409,
|
| 11083 |
+
"loss": 7.8054,
|
| 11084 |
+
"step": 1573
|
| 11085 |
+
},
|
| 11086 |
+
{
|
| 11087 |
+
"epoch": 0.5035994240921453,
|
| 11088 |
+
"grad_norm": 218.6697998046875,
|
| 11089 |
+
"learning_rate": 0.00020236952752998168,
|
| 11090 |
+
"loss": 7.4359,
|
| 11091 |
+
"step": 1574
|
| 11092 |
+
},
|
| 11093 |
+
{
|
| 11094 |
+
"epoch": 0.503919372900336,
|
| 11095 |
+
"grad_norm": 259.8759765625,
|
| 11096 |
+
"learning_rate": 0.00020217398372684439,
|
| 11097 |
+
"loss": 7.7714,
|
| 11098 |
+
"step": 1575
|
| 11099 |
+
},
|
| 11100 |
+
{
|
| 11101 |
+
"epoch": 0.5042393217085266,
|
| 11102 |
+
"grad_norm": 2446.431396484375,
|
| 11103 |
+
"learning_rate": 0.00020197844822681673,
|
| 11104 |
+
"loss": 7.4991,
|
| 11105 |
+
"step": 1576
|
| 11106 |
+
},
|
| 11107 |
+
{
|
| 11108 |
+
"epoch": 0.5045592705167173,
|
| 11109 |
+
"grad_norm": 38.335880279541016,
|
| 11110 |
+
"learning_rate": 0.00020178292123735454,
|
| 11111 |
+
"loss": 7.4279,
|
| 11112 |
+
"step": 1577
|
| 11113 |
+
},
|
| 11114 |
+
{
|
| 11115 |
+
"epoch": 0.504879219324908,
|
| 11116 |
+
"grad_norm": 64.94971466064453,
|
| 11117 |
+
"learning_rate": 0.00020158740296590474,
|
| 11118 |
+
"loss": 7.4041,
|
| 11119 |
+
"step": 1578
|
| 11120 |
+
},
|
| 11121 |
+
{
|
| 11122 |
+
"epoch": 0.5051991681330987,
|
| 11123 |
+
"grad_norm": 106.72026824951172,
|
| 11124 |
+
"learning_rate": 0.00020139189361990476,
|
| 11125 |
+
"loss": 7.404,
|
| 11126 |
+
"step": 1579
|
| 11127 |
+
},
|
| 11128 |
+
{
|
| 11129 |
+
"epoch": 0.5055191169412894,
|
| 11130 |
+
"grad_norm": 183.95343017578125,
|
| 11131 |
+
"learning_rate": 0.00020119639340678274,
|
| 11132 |
+
"loss": 7.0569,
|
| 11133 |
+
"step": 1580
|
| 11134 |
+
},
|
| 11135 |
+
{
|
| 11136 |
+
"epoch": 0.5058390657494801,
|
| 11137 |
+
"grad_norm": 561.7801513671875,
|
| 11138 |
+
"learning_rate": 0.0002010009025339571,
|
| 11139 |
+
"loss": 7.1187,
|
| 11140 |
+
"step": 1581
|
| 11141 |
+
},
|
| 11142 |
+
{
|
| 11143 |
+
"epoch": 0.5061590145576708,
|
| 11144 |
+
"grad_norm": 766.6022338867188,
|
| 11145 |
+
"learning_rate": 0.0002008054212088362,
|
| 11146 |
+
"loss": 7.0652,
|
| 11147 |
+
"step": 1582
|
| 11148 |
+
},
|
| 11149 |
+
{
|
| 11150 |
+
"epoch": 0.5064789633658615,
|
| 11151 |
+
"grad_norm": 2744.870361328125,
|
| 11152 |
+
"learning_rate": 0.0002006099496388185,
|
| 11153 |
+
"loss": 6.8775,
|
| 11154 |
+
"step": 1583
|
| 11155 |
+
},
|
| 11156 |
+
{
|
| 11157 |
+
"epoch": 0.5067989121740522,
|
| 11158 |
+
"grad_norm": 38.441341400146484,
|
| 11159 |
+
"learning_rate": 0.00020041448803129205,
|
| 11160 |
+
"loss": 7.1471,
|
| 11161 |
+
"step": 1584
|
| 11162 |
+
},
|
| 11163 |
+
{
|
| 11164 |
+
"epoch": 0.5071188609822428,
|
| 11165 |
+
"grad_norm": 338.59832763671875,
|
| 11166 |
+
"learning_rate": 0.00020021903659363414,
|
| 11167 |
+
"loss": 6.8759,
|
| 11168 |
+
"step": 1585
|
| 11169 |
+
},
|
| 11170 |
+
{
|
| 11171 |
+
"epoch": 0.5074388097904335,
|
| 11172 |
+
"grad_norm": 12.734524726867676,
|
| 11173 |
+
"learning_rate": 0.00020002359553321158,
|
| 11174 |
+
"loss": 7.1117,
|
| 11175 |
+
"step": 1586
|
| 11176 |
+
},
|
| 11177 |
+
{
|
| 11178 |
+
"epoch": 0.5077587585986242,
|
| 11179 |
+
"grad_norm": 10831.9697265625,
|
| 11180 |
+
"learning_rate": 0.00019982816505737978,
|
| 11181 |
+
"loss": 7.1011,
|
| 11182 |
+
"step": 1587
|
| 11183 |
+
},
|
| 11184 |
+
{
|
| 11185 |
+
"epoch": 0.5080787074068149,
|
| 11186 |
+
"grad_norm": 6180.69482421875,
|
| 11187 |
+
"learning_rate": 0.00019963274537348327,
|
| 11188 |
+
"loss": 6.7045,
|
| 11189 |
+
"step": 1588
|
| 11190 |
+
},
|
| 11191 |
+
{
|
| 11192 |
+
"epoch": 0.5083986562150056,
|
| 11193 |
+
"grad_norm": 75.3614730834961,
|
| 11194 |
+
"learning_rate": 0.0001994373366888551,
|
| 11195 |
+
"loss": 6.8817,
|
| 11196 |
+
"step": 1589
|
| 11197 |
+
},
|
| 11198 |
+
{
|
| 11199 |
+
"epoch": 0.5087186050231963,
|
| 11200 |
+
"grad_norm": 21362.6875,
|
| 11201 |
+
"learning_rate": 0.0001992419392108164,
|
| 11202 |
+
"loss": 6.5996,
|
| 11203 |
+
"step": 1590
|
| 11204 |
+
},
|
| 11205 |
+
{
|
| 11206 |
+
"epoch": 0.509038553831387,
|
| 11207 |
+
"grad_norm": 40.761505126953125,
|
| 11208 |
+
"learning_rate": 0.00019904655314667663,
|
| 11209 |
+
"loss": 6.6685,
|
| 11210 |
+
"step": 1591
|
| 11211 |
+
},
|
| 11212 |
+
{
|
| 11213 |
+
"epoch": 0.5093585026395777,
|
| 11214 |
+
"grad_norm": 61433.0625,
|
| 11215 |
+
"learning_rate": 0.0001988511787037331,
|
| 11216 |
+
"loss": 6.6834,
|
| 11217 |
+
"step": 1592
|
| 11218 |
+
},
|
| 11219 |
+
{
|
| 11220 |
+
"epoch": 0.5096784514477684,
|
| 11221 |
+
"grad_norm": 76.52178192138672,
|
| 11222 |
+
"learning_rate": 0.00019865581608927068,
|
| 11223 |
+
"loss": 6.7812,
|
| 11224 |
+
"step": 1593
|
| 11225 |
+
},
|
| 11226 |
+
{
|
| 11227 |
+
"epoch": 0.509998400255959,
|
| 11228 |
+
"grad_norm": 11.229225158691406,
|
| 11229 |
+
"learning_rate": 0.00019846046551056187,
|
| 11230 |
+
"loss": 6.7718,
|
| 11231 |
+
"step": 1594
|
| 11232 |
+
},
|
| 11233 |
+
{
|
| 11234 |
+
"epoch": 0.5103183490641497,
|
| 11235 |
+
"grad_norm": 56.32878112792969,
|
| 11236 |
+
"learning_rate": 0.00019826512717486625,
|
| 11237 |
+
"loss": 6.5842,
|
| 11238 |
+
"step": 1595
|
| 11239 |
+
},
|
| 11240 |
+
{
|
| 11241 |
+
"epoch": 0.5106382978723404,
|
| 11242 |
+
"grad_norm": 42.66135025024414,
|
| 11243 |
+
"learning_rate": 0.00019806980128943047,
|
| 11244 |
+
"loss": 6.5896,
|
| 11245 |
+
"step": 1596
|
| 11246 |
+
},
|
| 11247 |
+
{
|
| 11248 |
+
"epoch": 0.5109582466805311,
|
| 11249 |
+
"grad_norm": 115.71044158935547,
|
| 11250 |
+
"learning_rate": 0.00019787448806148802,
|
| 11251 |
+
"loss": 6.5618,
|
| 11252 |
+
"step": 1597
|
| 11253 |
+
},
|
| 11254 |
+
{
|
| 11255 |
+
"epoch": 0.5112781954887218,
|
| 11256 |
+
"grad_norm": 42.69276428222656,
|
| 11257 |
+
"learning_rate": 0.00019767918769825874,
|
| 11258 |
+
"loss": 6.6004,
|
| 11259 |
+
"step": 1598
|
| 11260 |
+
},
|
| 11261 |
+
{
|
| 11262 |
+
"epoch": 0.5115981442969125,
|
| 11263 |
+
"grad_norm": 238.4598846435547,
|
| 11264 |
+
"learning_rate": 0.0001974839004069492,
|
| 11265 |
+
"loss": 6.6235,
|
| 11266 |
+
"step": 1599
|
| 11267 |
+
},
|
| 11268 |
+
{
|
| 11269 |
+
"epoch": 0.5119180931051032,
|
| 11270 |
+
"grad_norm": 34.8486213684082,
|
| 11271 |
+
"learning_rate": 0.00019728862639475182,
|
| 11272 |
+
"loss": 6.7821,
|
| 11273 |
+
"step": 1600
|
| 11274 |
+
},
|
| 11275 |
+
{
|
| 11276 |
+
"epoch": 0.5119180931051032,
|
| 11277 |
+
"eval_loss": 3.3196933269500732,
|
| 11278 |
+
"eval_runtime": 233.9804,
|
| 11279 |
+
"eval_samples_per_second": 5.624,
|
| 11280 |
+
"eval_steps_per_second": 1.406,
|
| 11281 |
+
"step": 1600
|
| 11282 |
}
|
| 11283 |
],
|
| 11284 |
"logging_steps": 1,
|
|
|
|
| 11293 |
"early_stopping_threshold": 0.0
|
| 11294 |
},
|
| 11295 |
"attributes": {
|
| 11296 |
+
"early_stopping_patience_counter": 5
|
| 11297 |
}
|
| 11298 |
},
|
| 11299 |
"TrainerControl": {
|
|
|
|
| 11307 |
"attributes": {}
|
| 11308 |
}
|
| 11309 |
},
|
| 11310 |
+
"total_flos": 1.480009690795475e+18,
|
| 11311 |
"train_batch_size": 4,
|
| 11312 |
"trial_name": null,
|
| 11313 |
"trial_params": null
|