Training in progress, step 2000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 147770496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:612b91cd8b042500cbc122150baa555e6dedebb7a686b92d9cfc441c50b9dac1
|
| 3 |
size 147770496
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 75472244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c811480dcfed04064dad5883988addf3398e7c9dda053ca53b380592e9d28e4
|
| 3 |
size 75472244
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd29e4d93951254b33246b52f5e18c95727b99c72fecd9adbb8b1e8c8261b74
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03405f993da5fc8a5702fc88093304314731eeea62085fc7abc5f037e616665e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric":
|
| 3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -12687,6 +12687,1414 @@
|
|
| 12687 |
"eval_samples_per_second": 17.551,
|
| 12688 |
"eval_steps_per_second": 4.388,
|
| 12689 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12690 |
}
|
| 12691 |
],
|
| 12692 |
"logging_steps": 1,
|
|
@@ -12715,7 +14123,7 @@
|
|
| 12715 |
"attributes": {}
|
| 12716 |
}
|
| 12717 |
},
|
| 12718 |
-
"total_flos": 1.
|
| 12719 |
"train_batch_size": 4,
|
| 12720 |
"trial_name": null,
|
| 12721 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 2.5830555387074128e-05,
|
| 3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-2000",
|
| 4 |
+
"epoch": 0.8204286739821557,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 2000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 12687 |
"eval_samples_per_second": 17.551,
|
| 12688 |
"eval_steps_per_second": 4.388,
|
| 12689 |
"step": 1800
|
| 12690 |
+
},
|
| 12691 |
+
{
|
| 12692 |
+
"epoch": 0.7387960209209312,
|
| 12693 |
+
"grad_norm": 6.24289532424882e-05,
|
| 12694 |
+
"learning_rate": 0.00017219722319487693,
|
| 12695 |
+
"loss": 0.0,
|
| 12696 |
+
"step": 1801
|
| 12697 |
+
},
|
| 12698 |
+
{
|
| 12699 |
+
"epoch": 0.7392062352579223,
|
| 12700 |
+
"grad_norm": 6.859466520836577e-05,
|
| 12701 |
+
"learning_rate": 0.00017216736154912138,
|
| 12702 |
+
"loss": 0.0,
|
| 12703 |
+
"step": 1802
|
| 12704 |
+
},
|
| 12705 |
+
{
|
| 12706 |
+
"epoch": 0.7396164495949133,
|
| 12707 |
+
"grad_norm": 3.647254561656155e-05,
|
| 12708 |
+
"learning_rate": 0.00017213748646772124,
|
| 12709 |
+
"loss": 0.0,
|
| 12710 |
+
"step": 1803
|
| 12711 |
+
},
|
| 12712 |
+
{
|
| 12713 |
+
"epoch": 0.7400266639319044,
|
| 12714 |
+
"grad_norm": 0.0010235728695988655,
|
| 12715 |
+
"learning_rate": 0.0001721075979562386,
|
| 12716 |
+
"loss": 0.0,
|
| 12717 |
+
"step": 1804
|
| 12718 |
+
},
|
| 12719 |
+
{
|
| 12720 |
+
"epoch": 0.7404368782688955,
|
| 12721 |
+
"grad_norm": 5.68695213587489e-05,
|
| 12722 |
+
"learning_rate": 0.0001720776960202378,
|
| 12723 |
+
"loss": 0.0,
|
| 12724 |
+
"step": 1805
|
| 12725 |
+
},
|
| 12726 |
+
{
|
| 12727 |
+
"epoch": 0.7408470926058865,
|
| 12728 |
+
"grad_norm": 0.003287101862952113,
|
| 12729 |
+
"learning_rate": 0.00017204778066528588,
|
| 12730 |
+
"loss": 0.0001,
|
| 12731 |
+
"step": 1806
|
| 12732 |
+
},
|
| 12733 |
+
{
|
| 12734 |
+
"epoch": 0.7412573069428776,
|
| 12735 |
+
"grad_norm": 0.0002474411448929459,
|
| 12736 |
+
"learning_rate": 0.0001720178518969522,
|
| 12737 |
+
"loss": 0.0,
|
| 12738 |
+
"step": 1807
|
| 12739 |
+
},
|
| 12740 |
+
{
|
| 12741 |
+
"epoch": 0.7416675212798687,
|
| 12742 |
+
"grad_norm": 7.206134614534676e-05,
|
| 12743 |
+
"learning_rate": 0.00017198790972080877,
|
| 12744 |
+
"loss": 0.0,
|
| 12745 |
+
"step": 1808
|
| 12746 |
+
},
|
| 12747 |
+
{
|
| 12748 |
+
"epoch": 0.7420777356168599,
|
| 12749 |
+
"grad_norm": 0.00015744745905976743,
|
| 12750 |
+
"learning_rate": 0.00017195795414242998,
|
| 12751 |
+
"loss": 0.0,
|
| 12752 |
+
"step": 1809
|
| 12753 |
+
},
|
| 12754 |
+
{
|
| 12755 |
+
"epoch": 0.7424879499538509,
|
| 12756 |
+
"grad_norm": 9.252997551811859e-05,
|
| 12757 |
+
"learning_rate": 0.00017192798516739278,
|
| 12758 |
+
"loss": 0.0,
|
| 12759 |
+
"step": 1810
|
| 12760 |
+
},
|
| 12761 |
+
{
|
| 12762 |
+
"epoch": 0.742898164290842,
|
| 12763 |
+
"grad_norm": 7.837930752430111e-05,
|
| 12764 |
+
"learning_rate": 0.00017189800280127663,
|
| 12765 |
+
"loss": 0.0,
|
| 12766 |
+
"step": 1811
|
| 12767 |
+
},
|
| 12768 |
+
{
|
| 12769 |
+
"epoch": 0.743308378627833,
|
| 12770 |
+
"grad_norm": 0.00020196700643282384,
|
| 12771 |
+
"learning_rate": 0.00017186800704966336,
|
| 12772 |
+
"loss": 0.0,
|
| 12773 |
+
"step": 1812
|
| 12774 |
+
},
|
| 12775 |
+
{
|
| 12776 |
+
"epoch": 0.7437185929648241,
|
| 12777 |
+
"grad_norm": 9.818665421335027e-05,
|
| 12778 |
+
"learning_rate": 0.00017183799791813746,
|
| 12779 |
+
"loss": 0.0,
|
| 12780 |
+
"step": 1813
|
| 12781 |
+
},
|
| 12782 |
+
{
|
| 12783 |
+
"epoch": 0.7441288073018152,
|
| 12784 |
+
"grad_norm": 0.006354078650474548,
|
| 12785 |
+
"learning_rate": 0.0001718079754122858,
|
| 12786 |
+
"loss": 0.0002,
|
| 12787 |
+
"step": 1814
|
| 12788 |
+
},
|
| 12789 |
+
{
|
| 12790 |
+
"epoch": 0.7445390216388063,
|
| 12791 |
+
"grad_norm": 0.0002685587387531996,
|
| 12792 |
+
"learning_rate": 0.0001717779395376978,
|
| 12793 |
+
"loss": 0.0,
|
| 12794 |
+
"step": 1815
|
| 12795 |
+
},
|
| 12796 |
+
{
|
| 12797 |
+
"epoch": 0.7449492359757973,
|
| 12798 |
+
"grad_norm": 0.0006759017705917358,
|
| 12799 |
+
"learning_rate": 0.00017174789029996532,
|
| 12800 |
+
"loss": 0.0,
|
| 12801 |
+
"step": 1816
|
| 12802 |
+
},
|
| 12803 |
+
{
|
| 12804 |
+
"epoch": 0.7453594503127884,
|
| 12805 |
+
"grad_norm": 0.0003719718079082668,
|
| 12806 |
+
"learning_rate": 0.00017171782770468276,
|
| 12807 |
+
"loss": 0.0,
|
| 12808 |
+
"step": 1817
|
| 12809 |
+
},
|
| 12810 |
+
{
|
| 12811 |
+
"epoch": 0.7457696646497796,
|
| 12812 |
+
"grad_norm": 0.0017665522173047066,
|
| 12813 |
+
"learning_rate": 0.00017168775175744689,
|
| 12814 |
+
"loss": 0.0001,
|
| 12815 |
+
"step": 1818
|
| 12816 |
+
},
|
| 12817 |
+
{
|
| 12818 |
+
"epoch": 0.7461798789867706,
|
| 12819 |
+
"grad_norm": 0.0005688764504157007,
|
| 12820 |
+
"learning_rate": 0.00017165766246385713,
|
| 12821 |
+
"loss": 0.0,
|
| 12822 |
+
"step": 1819
|
| 12823 |
+
},
|
| 12824 |
+
{
|
| 12825 |
+
"epoch": 0.7465900933237617,
|
| 12826 |
+
"grad_norm": 0.000330442184349522,
|
| 12827 |
+
"learning_rate": 0.0001716275598295153,
|
| 12828 |
+
"loss": 0.0,
|
| 12829 |
+
"step": 1820
|
| 12830 |
+
},
|
| 12831 |
+
{
|
| 12832 |
+
"epoch": 0.7470003076607528,
|
| 12833 |
+
"grad_norm": 0.00011137500405311584,
|
| 12834 |
+
"learning_rate": 0.0001715974438600257,
|
| 12835 |
+
"loss": 0.0,
|
| 12836 |
+
"step": 1821
|
| 12837 |
+
},
|
| 12838 |
+
{
|
| 12839 |
+
"epoch": 0.7474105219977438,
|
| 12840 |
+
"grad_norm": 0.0054165697656571865,
|
| 12841 |
+
"learning_rate": 0.0001715673145609951,
|
| 12842 |
+
"loss": 0.0001,
|
| 12843 |
+
"step": 1822
|
| 12844 |
+
},
|
| 12845 |
+
{
|
| 12846 |
+
"epoch": 0.7478207363347349,
|
| 12847 |
+
"grad_norm": 0.00012272020103409886,
|
| 12848 |
+
"learning_rate": 0.00017153717193803283,
|
| 12849 |
+
"loss": 0.0,
|
| 12850 |
+
"step": 1823
|
| 12851 |
+
},
|
| 12852 |
+
{
|
| 12853 |
+
"epoch": 0.748230950671726,
|
| 12854 |
+
"grad_norm": 9.0913446911145e-05,
|
| 12855 |
+
"learning_rate": 0.00017150701599675058,
|
| 12856 |
+
"loss": 0.0,
|
| 12857 |
+
"step": 1824
|
| 12858 |
+
},
|
| 12859 |
+
{
|
| 12860 |
+
"epoch": 0.748641165008717,
|
| 12861 |
+
"grad_norm": 0.00021381184342317283,
|
| 12862 |
+
"learning_rate": 0.00017147684674276263,
|
| 12863 |
+
"loss": 0.0,
|
| 12864 |
+
"step": 1825
|
| 12865 |
+
},
|
| 12866 |
+
{
|
| 12867 |
+
"epoch": 0.7490513793457081,
|
| 12868 |
+
"grad_norm": 0.0001662960712565109,
|
| 12869 |
+
"learning_rate": 0.00017144666418168568,
|
| 12870 |
+
"loss": 0.0,
|
| 12871 |
+
"step": 1826
|
| 12872 |
+
},
|
| 12873 |
+
{
|
| 12874 |
+
"epoch": 0.7494615936826992,
|
| 12875 |
+
"grad_norm": 0.0002207818761235103,
|
| 12876 |
+
"learning_rate": 0.00017141646831913892,
|
| 12877 |
+
"loss": 0.0,
|
| 12878 |
+
"step": 1827
|
| 12879 |
+
},
|
| 12880 |
+
{
|
| 12881 |
+
"epoch": 0.7498718080196903,
|
| 12882 |
+
"grad_norm": 0.0006290765013545752,
|
| 12883 |
+
"learning_rate": 0.000171386259160744,
|
| 12884 |
+
"loss": 0.0,
|
| 12885 |
+
"step": 1828
|
| 12886 |
+
},
|
| 12887 |
+
{
|
| 12888 |
+
"epoch": 0.7502820223566814,
|
| 12889 |
+
"grad_norm": 0.00022595013433601707,
|
| 12890 |
+
"learning_rate": 0.0001713560367121251,
|
| 12891 |
+
"loss": 0.0,
|
| 12892 |
+
"step": 1829
|
| 12893 |
+
},
|
| 12894 |
+
{
|
| 12895 |
+
"epoch": 0.7506922366936725,
|
| 12896 |
+
"grad_norm": 8.614773832960054e-05,
|
| 12897 |
+
"learning_rate": 0.0001713258009789088,
|
| 12898 |
+
"loss": 0.0,
|
| 12899 |
+
"step": 1830
|
| 12900 |
+
},
|
| 12901 |
+
{
|
| 12902 |
+
"epoch": 0.7511024510306635,
|
| 12903 |
+
"grad_norm": 6.568868411704898e-05,
|
| 12904 |
+
"learning_rate": 0.00017129555196672422,
|
| 12905 |
+
"loss": 0.0,
|
| 12906 |
+
"step": 1831
|
| 12907 |
+
},
|
| 12908 |
+
{
|
| 12909 |
+
"epoch": 0.7515126653676546,
|
| 12910 |
+
"grad_norm": 0.000257197767496109,
|
| 12911 |
+
"learning_rate": 0.0001712652896812029,
|
| 12912 |
+
"loss": 0.0,
|
| 12913 |
+
"step": 1832
|
| 12914 |
+
},
|
| 12915 |
+
{
|
| 12916 |
+
"epoch": 0.7519228797046457,
|
| 12917 |
+
"grad_norm": 0.0004804676282219589,
|
| 12918 |
+
"learning_rate": 0.00017123501412797887,
|
| 12919 |
+
"loss": 0.0,
|
| 12920 |
+
"step": 1833
|
| 12921 |
+
},
|
| 12922 |
+
{
|
| 12923 |
+
"epoch": 0.7523330940416367,
|
| 12924 |
+
"grad_norm": 0.00018361452384851873,
|
| 12925 |
+
"learning_rate": 0.00017120472531268865,
|
| 12926 |
+
"loss": 0.0,
|
| 12927 |
+
"step": 1834
|
| 12928 |
+
},
|
| 12929 |
+
{
|
| 12930 |
+
"epoch": 0.7527433083786278,
|
| 12931 |
+
"grad_norm": 0.0004529896250460297,
|
| 12932 |
+
"learning_rate": 0.00017117442324097123,
|
| 12933 |
+
"loss": 0.0,
|
| 12934 |
+
"step": 1835
|
| 12935 |
+
},
|
| 12936 |
+
{
|
| 12937 |
+
"epoch": 0.7531535227156189,
|
| 12938 |
+
"grad_norm": 0.00019788274948950857,
|
| 12939 |
+
"learning_rate": 0.000171144107918468,
|
| 12940 |
+
"loss": 0.0,
|
| 12941 |
+
"step": 1836
|
| 12942 |
+
},
|
| 12943 |
+
{
|
| 12944 |
+
"epoch": 0.75356373705261,
|
| 12945 |
+
"grad_norm": 0.0002504466974642128,
|
| 12946 |
+
"learning_rate": 0.0001711137793508229,
|
| 12947 |
+
"loss": 0.0,
|
| 12948 |
+
"step": 1837
|
| 12949 |
+
},
|
| 12950 |
+
{
|
| 12951 |
+
"epoch": 0.7539739513896011,
|
| 12952 |
+
"grad_norm": 8.635919948574156e-05,
|
| 12953 |
+
"learning_rate": 0.0001710834375436823,
|
| 12954 |
+
"loss": 0.0,
|
| 12955 |
+
"step": 1838
|
| 12956 |
+
},
|
| 12957 |
+
{
|
| 12958 |
+
"epoch": 0.7543841657265922,
|
| 12959 |
+
"grad_norm": 4.458254625205882e-05,
|
| 12960 |
+
"learning_rate": 0.00017105308250269503,
|
| 12961 |
+
"loss": 0.0,
|
| 12962 |
+
"step": 1839
|
| 12963 |
+
},
|
| 12964 |
+
{
|
| 12965 |
+
"epoch": 0.7547943800635832,
|
| 12966 |
+
"grad_norm": 0.00015645618259441108,
|
| 12967 |
+
"learning_rate": 0.0001710227142335124,
|
| 12968 |
+
"loss": 0.0,
|
| 12969 |
+
"step": 1840
|
| 12970 |
+
},
|
| 12971 |
+
{
|
| 12972 |
+
"epoch": 0.7552045944005743,
|
| 12973 |
+
"grad_norm": 5.3510993893723935e-05,
|
| 12974 |
+
"learning_rate": 0.00017099233274178816,
|
| 12975 |
+
"loss": 0.0,
|
| 12976 |
+
"step": 1841
|
| 12977 |
+
},
|
| 12978 |
+
{
|
| 12979 |
+
"epoch": 0.7556148087375654,
|
| 12980 |
+
"grad_norm": 0.00012250395957380533,
|
| 12981 |
+
"learning_rate": 0.00017096193803317855,
|
| 12982 |
+
"loss": 0.0,
|
| 12983 |
+
"step": 1842
|
| 12984 |
+
},
|
| 12985 |
+
{
|
| 12986 |
+
"epoch": 0.7560250230745564,
|
| 12987 |
+
"grad_norm": 5.202455577091314e-05,
|
| 12988 |
+
"learning_rate": 0.00017093153011334225,
|
| 12989 |
+
"loss": 0.0,
|
| 12990 |
+
"step": 1843
|
| 12991 |
+
},
|
| 12992 |
+
{
|
| 12993 |
+
"epoch": 0.7564352374115475,
|
| 12994 |
+
"grad_norm": 7.595092756673694e-05,
|
| 12995 |
+
"learning_rate": 0.00017090110898794044,
|
| 12996 |
+
"loss": 0.0,
|
| 12997 |
+
"step": 1844
|
| 12998 |
+
},
|
| 12999 |
+
{
|
| 13000 |
+
"epoch": 0.7568454517485386,
|
| 13001 |
+
"grad_norm": 8.676994912093505e-05,
|
| 13002 |
+
"learning_rate": 0.00017087067466263665,
|
| 13003 |
+
"loss": 0.0,
|
| 13004 |
+
"step": 1845
|
| 13005 |
+
},
|
| 13006 |
+
{
|
| 13007 |
+
"epoch": 0.7572556660855297,
|
| 13008 |
+
"grad_norm": 0.06363023072481155,
|
| 13009 |
+
"learning_rate": 0.000170840227143097,
|
| 13010 |
+
"loss": 0.0005,
|
| 13011 |
+
"step": 1846
|
| 13012 |
+
},
|
| 13013 |
+
{
|
| 13014 |
+
"epoch": 0.7576658804225208,
|
| 13015 |
+
"grad_norm": 0.00010037697938969359,
|
| 13016 |
+
"learning_rate": 0.00017080976643499006,
|
| 13017 |
+
"loss": 0.0,
|
| 13018 |
+
"step": 1847
|
| 13019 |
+
},
|
| 13020 |
+
{
|
| 13021 |
+
"epoch": 0.7580760947595119,
|
| 13022 |
+
"grad_norm": 4.500959403230809e-05,
|
| 13023 |
+
"learning_rate": 0.00017077929254398665,
|
| 13024 |
+
"loss": 0.0,
|
| 13025 |
+
"step": 1848
|
| 13026 |
+
},
|
| 13027 |
+
{
|
| 13028 |
+
"epoch": 0.7584863090965029,
|
| 13029 |
+
"grad_norm": 0.0006774234352633357,
|
| 13030 |
+
"learning_rate": 0.00017074880547576036,
|
| 13031 |
+
"loss": 0.0,
|
| 13032 |
+
"step": 1849
|
| 13033 |
+
},
|
| 13034 |
+
{
|
| 13035 |
+
"epoch": 0.758896523433494,
|
| 13036 |
+
"grad_norm": 0.0006148330285213888,
|
| 13037 |
+
"learning_rate": 0.00017071830523598695,
|
| 13038 |
+
"loss": 0.0,
|
| 13039 |
+
"step": 1850
|
| 13040 |
+
},
|
| 13041 |
+
{
|
| 13042 |
+
"epoch": 0.7593067377704851,
|
| 13043 |
+
"grad_norm": 8.246702054748312e-05,
|
| 13044 |
+
"learning_rate": 0.00017068779183034485,
|
| 13045 |
+
"loss": 0.0,
|
| 13046 |
+
"step": 1851
|
| 13047 |
+
},
|
| 13048 |
+
{
|
| 13049 |
+
"epoch": 0.7597169521074761,
|
| 13050 |
+
"grad_norm": 0.00030712856096215546,
|
| 13051 |
+
"learning_rate": 0.00017065726526451478,
|
| 13052 |
+
"loss": 0.0,
|
| 13053 |
+
"step": 1852
|
| 13054 |
+
},
|
| 13055 |
+
{
|
| 13056 |
+
"epoch": 0.7601271664444672,
|
| 13057 |
+
"grad_norm": 0.00012686121044680476,
|
| 13058 |
+
"learning_rate": 0.00017062672554417997,
|
| 13059 |
+
"loss": 0.0,
|
| 13060 |
+
"step": 1853
|
| 13061 |
+
},
|
| 13062 |
+
{
|
| 13063 |
+
"epoch": 0.7605373807814583,
|
| 13064 |
+
"grad_norm": 4.873663783655502e-05,
|
| 13065 |
+
"learning_rate": 0.00017059617267502618,
|
| 13066 |
+
"loss": 0.0,
|
| 13067 |
+
"step": 1854
|
| 13068 |
+
},
|
| 13069 |
+
{
|
| 13070 |
+
"epoch": 0.7609475951184493,
|
| 13071 |
+
"grad_norm": 0.0012454637326300144,
|
| 13072 |
+
"learning_rate": 0.00017056560666274146,
|
| 13073 |
+
"loss": 0.0001,
|
| 13074 |
+
"step": 1855
|
| 13075 |
+
},
|
| 13076 |
+
{
|
| 13077 |
+
"epoch": 0.7613578094554405,
|
| 13078 |
+
"grad_norm": 0.0001640759437577799,
|
| 13079 |
+
"learning_rate": 0.00017053502751301642,
|
| 13080 |
+
"loss": 0.0,
|
| 13081 |
+
"step": 1856
|
| 13082 |
+
},
|
| 13083 |
+
{
|
| 13084 |
+
"epoch": 0.7617680237924316,
|
| 13085 |
+
"grad_norm": 0.0007171401521191001,
|
| 13086 |
+
"learning_rate": 0.0001705044352315441,
|
| 13087 |
+
"loss": 0.0,
|
| 13088 |
+
"step": 1857
|
| 13089 |
+
},
|
| 13090 |
+
{
|
| 13091 |
+
"epoch": 0.7621782381294226,
|
| 13092 |
+
"grad_norm": 3.358495814609341e-05,
|
| 13093 |
+
"learning_rate": 0.00017047382982401994,
|
| 13094 |
+
"loss": 0.0,
|
| 13095 |
+
"step": 1858
|
| 13096 |
+
},
|
| 13097 |
+
{
|
| 13098 |
+
"epoch": 0.7625884524664137,
|
| 13099 |
+
"grad_norm": 0.00012298431829549372,
|
| 13100 |
+
"learning_rate": 0.00017044321129614187,
|
| 13101 |
+
"loss": 0.0,
|
| 13102 |
+
"step": 1859
|
| 13103 |
+
},
|
| 13104 |
+
{
|
| 13105 |
+
"epoch": 0.7629986668034048,
|
| 13106 |
+
"grad_norm": 3.476136771496385e-05,
|
| 13107 |
+
"learning_rate": 0.00017041257965361026,
|
| 13108 |
+
"loss": 0.0,
|
| 13109 |
+
"step": 1860
|
| 13110 |
+
},
|
| 13111 |
+
{
|
| 13112 |
+
"epoch": 0.7634088811403958,
|
| 13113 |
+
"grad_norm": 0.00014345084491651505,
|
| 13114 |
+
"learning_rate": 0.00017038193490212788,
|
| 13115 |
+
"loss": 0.0,
|
| 13116 |
+
"step": 1861
|
| 13117 |
+
},
|
| 13118 |
+
{
|
| 13119 |
+
"epoch": 0.7638190954773869,
|
| 13120 |
+
"grad_norm": 0.0006720746750943363,
|
| 13121 |
+
"learning_rate": 0.0001703512770474,
|
| 13122 |
+
"loss": 0.0,
|
| 13123 |
+
"step": 1862
|
| 13124 |
+
},
|
| 13125 |
+
{
|
| 13126 |
+
"epoch": 0.764229309814378,
|
| 13127 |
+
"grad_norm": 6.40077778371051e-05,
|
| 13128 |
+
"learning_rate": 0.00017032060609513424,
|
| 13129 |
+
"loss": 0.0,
|
| 13130 |
+
"step": 1863
|
| 13131 |
+
},
|
| 13132 |
+
{
|
| 13133 |
+
"epoch": 0.764639524151369,
|
| 13134 |
+
"grad_norm": 0.00015238374180626124,
|
| 13135 |
+
"learning_rate": 0.00017028992205104078,
|
| 13136 |
+
"loss": 0.0,
|
| 13137 |
+
"step": 1864
|
| 13138 |
+
},
|
| 13139 |
+
{
|
| 13140 |
+
"epoch": 0.7650497384883602,
|
| 13141 |
+
"grad_norm": 0.00023494350898545235,
|
| 13142 |
+
"learning_rate": 0.00017025922492083215,
|
| 13143 |
+
"loss": 0.0,
|
| 13144 |
+
"step": 1865
|
| 13145 |
+
},
|
| 13146 |
+
{
|
| 13147 |
+
"epoch": 0.7654599528253513,
|
| 13148 |
+
"grad_norm": 6.289301381912082e-05,
|
| 13149 |
+
"learning_rate": 0.00017022851471022333,
|
| 13150 |
+
"loss": 0.0,
|
| 13151 |
+
"step": 1866
|
| 13152 |
+
},
|
| 13153 |
+
{
|
| 13154 |
+
"epoch": 0.7658701671623424,
|
| 13155 |
+
"grad_norm": 0.00010566967102931812,
|
| 13156 |
+
"learning_rate": 0.00017019779142493174,
|
| 13157 |
+
"loss": 0.0,
|
| 13158 |
+
"step": 1867
|
| 13159 |
+
},
|
| 13160 |
+
{
|
| 13161 |
+
"epoch": 0.7662803814993334,
|
| 13162 |
+
"grad_norm": 0.0003754729696083814,
|
| 13163 |
+
"learning_rate": 0.00017016705507067728,
|
| 13164 |
+
"loss": 0.0,
|
| 13165 |
+
"step": 1868
|
| 13166 |
+
},
|
| 13167 |
+
{
|
| 13168 |
+
"epoch": 0.7666905958363245,
|
| 13169 |
+
"grad_norm": 5.574085662374273e-05,
|
| 13170 |
+
"learning_rate": 0.0001701363056531822,
|
| 13171 |
+
"loss": 0.0,
|
| 13172 |
+
"step": 1869
|
| 13173 |
+
},
|
| 13174 |
+
{
|
| 13175 |
+
"epoch": 0.7671008101733156,
|
| 13176 |
+
"grad_norm": 0.0006382344290614128,
|
| 13177 |
+
"learning_rate": 0.00017010554317817124,
|
| 13178 |
+
"loss": 0.0,
|
| 13179 |
+
"step": 1870
|
| 13180 |
+
},
|
| 13181 |
+
{
|
| 13182 |
+
"epoch": 0.7675110245103066,
|
| 13183 |
+
"grad_norm": 0.0001239187695318833,
|
| 13184 |
+
"learning_rate": 0.00017007476765137154,
|
| 13185 |
+
"loss": 0.0,
|
| 13186 |
+
"step": 1871
|
| 13187 |
+
},
|
| 13188 |
+
{
|
| 13189 |
+
"epoch": 0.7679212388472977,
|
| 13190 |
+
"grad_norm": 0.00011495229409774765,
|
| 13191 |
+
"learning_rate": 0.0001700439790785127,
|
| 13192 |
+
"loss": 0.0,
|
| 13193 |
+
"step": 1872
|
| 13194 |
+
},
|
| 13195 |
+
{
|
| 13196 |
+
"epoch": 0.7683314531842887,
|
| 13197 |
+
"grad_norm": 0.0063695781864225864,
|
| 13198 |
+
"learning_rate": 0.00017001317746532678,
|
| 13199 |
+
"loss": 0.0001,
|
| 13200 |
+
"step": 1873
|
| 13201 |
+
},
|
| 13202 |
+
{
|
| 13203 |
+
"epoch": 0.7687416675212798,
|
| 13204 |
+
"grad_norm": 8.43389134388417e-05,
|
| 13205 |
+
"learning_rate": 0.0001699823628175481,
|
| 13206 |
+
"loss": 0.0,
|
| 13207 |
+
"step": 1874
|
| 13208 |
+
},
|
| 13209 |
+
{
|
| 13210 |
+
"epoch": 0.769151881858271,
|
| 13211 |
+
"grad_norm": 0.00022165938571561128,
|
| 13212 |
+
"learning_rate": 0.00016995153514091365,
|
| 13213 |
+
"loss": 0.0,
|
| 13214 |
+
"step": 1875
|
| 13215 |
+
},
|
| 13216 |
+
{
|
| 13217 |
+
"epoch": 0.7695620961952621,
|
| 13218 |
+
"grad_norm": 3.49842848663684e-05,
|
| 13219 |
+
"learning_rate": 0.00016992069444116264,
|
| 13220 |
+
"loss": 0.0,
|
| 13221 |
+
"step": 1876
|
| 13222 |
+
},
|
| 13223 |
+
{
|
| 13224 |
+
"epoch": 0.7699723105322531,
|
| 13225 |
+
"grad_norm": 0.00031850888626649976,
|
| 13226 |
+
"learning_rate": 0.00016988984072403688,
|
| 13227 |
+
"loss": 0.0,
|
| 13228 |
+
"step": 1877
|
| 13229 |
+
},
|
| 13230 |
+
{
|
| 13231 |
+
"epoch": 0.7703825248692442,
|
| 13232 |
+
"grad_norm": 0.0005794697208330035,
|
| 13233 |
+
"learning_rate": 0.00016985897399528042,
|
| 13234 |
+
"loss": 0.0,
|
| 13235 |
+
"step": 1878
|
| 13236 |
+
},
|
| 13237 |
+
{
|
| 13238 |
+
"epoch": 0.7707927392062353,
|
| 13239 |
+
"grad_norm": 7.690059283049777e-05,
|
| 13240 |
+
"learning_rate": 0.00016982809426063986,
|
| 13241 |
+
"loss": 0.0,
|
| 13242 |
+
"step": 1879
|
| 13243 |
+
},
|
| 13244 |
+
{
|
| 13245 |
+
"epoch": 0.7712029535432263,
|
| 13246 |
+
"grad_norm": 0.00010885802475968376,
|
| 13247 |
+
"learning_rate": 0.0001697972015258642,
|
| 13248 |
+
"loss": 0.0,
|
| 13249 |
+
"step": 1880
|
| 13250 |
+
},
|
| 13251 |
+
{
|
| 13252 |
+
"epoch": 0.7716131678802174,
|
| 13253 |
+
"grad_norm": 6.041960295988247e-05,
|
| 13254 |
+
"learning_rate": 0.00016976629579670483,
|
| 13255 |
+
"loss": 0.0,
|
| 13256 |
+
"step": 1881
|
| 13257 |
+
},
|
| 13258 |
+
{
|
| 13259 |
+
"epoch": 0.7720233822172085,
|
| 13260 |
+
"grad_norm": 0.00021703245874959975,
|
| 13261 |
+
"learning_rate": 0.00016973537707891558,
|
| 13262 |
+
"loss": 0.0,
|
| 13263 |
+
"step": 1882
|
| 13264 |
+
},
|
| 13265 |
+
{
|
| 13266 |
+
"epoch": 0.7724335965541995,
|
| 13267 |
+
"grad_norm": 6.754266360076144e-05,
|
| 13268 |
+
"learning_rate": 0.00016970444537825268,
|
| 13269 |
+
"loss": 0.0,
|
| 13270 |
+
"step": 1883
|
| 13271 |
+
},
|
| 13272 |
+
{
|
| 13273 |
+
"epoch": 0.7728438108911907,
|
| 13274 |
+
"grad_norm": 0.000241806628764607,
|
| 13275 |
+
"learning_rate": 0.00016967350070047477,
|
| 13276 |
+
"loss": 0.0,
|
| 13277 |
+
"step": 1884
|
| 13278 |
+
},
|
| 13279 |
+
{
|
| 13280 |
+
"epoch": 0.7732540252281818,
|
| 13281 |
+
"grad_norm": 5.24107163073495e-05,
|
| 13282 |
+
"learning_rate": 0.00016964254305134303,
|
| 13283 |
+
"loss": 0.0,
|
| 13284 |
+
"step": 1885
|
| 13285 |
+
},
|
| 13286 |
+
{
|
| 13287 |
+
"epoch": 0.7736642395651728,
|
| 13288 |
+
"grad_norm": 7.230758637888357e-05,
|
| 13289 |
+
"learning_rate": 0.00016961157243662085,
|
| 13290 |
+
"loss": 0.0,
|
| 13291 |
+
"step": 1886
|
| 13292 |
+
},
|
| 13293 |
+
{
|
| 13294 |
+
"epoch": 0.7740744539021639,
|
| 13295 |
+
"grad_norm": 8.965937013272196e-05,
|
| 13296 |
+
"learning_rate": 0.00016958058886207416,
|
| 13297 |
+
"loss": 0.0,
|
| 13298 |
+
"step": 1887
|
| 13299 |
+
},
|
| 13300 |
+
{
|
| 13301 |
+
"epoch": 0.774484668239155,
|
| 13302 |
+
"grad_norm": 0.3443005084991455,
|
| 13303 |
+
"learning_rate": 0.0001695495923334713,
|
| 13304 |
+
"loss": 0.0051,
|
| 13305 |
+
"step": 1888
|
| 13306 |
+
},
|
| 13307 |
+
{
|
| 13308 |
+
"epoch": 0.774894882576146,
|
| 13309 |
+
"grad_norm": 0.00016324226453434676,
|
| 13310 |
+
"learning_rate": 0.00016951858285658294,
|
| 13311 |
+
"loss": 0.0,
|
| 13312 |
+
"step": 1889
|
| 13313 |
+
},
|
| 13314 |
+
{
|
| 13315 |
+
"epoch": 0.7753050969131371,
|
| 13316 |
+
"grad_norm": 0.0003269320004619658,
|
| 13317 |
+
"learning_rate": 0.0001694875604371823,
|
| 13318 |
+
"loss": 0.0,
|
| 13319 |
+
"step": 1890
|
| 13320 |
+
},
|
| 13321 |
+
{
|
| 13322 |
+
"epoch": 0.7757153112501282,
|
| 13323 |
+
"grad_norm": 0.00020214481628499925,
|
| 13324 |
+
"learning_rate": 0.00016945652508104487,
|
| 13325 |
+
"loss": 0.0,
|
| 13326 |
+
"step": 1891
|
| 13327 |
+
},
|
| 13328 |
+
{
|
| 13329 |
+
"epoch": 0.7761255255871192,
|
| 13330 |
+
"grad_norm": 0.010041148401796818,
|
| 13331 |
+
"learning_rate": 0.00016942547679394865,
|
| 13332 |
+
"loss": 0.0001,
|
| 13333 |
+
"step": 1892
|
| 13334 |
+
},
|
| 13335 |
+
{
|
| 13336 |
+
"epoch": 0.7765357399241104,
|
| 13337 |
+
"grad_norm": 0.0016112264711409807,
|
| 13338 |
+
"learning_rate": 0.000169394415581674,
|
| 13339 |
+
"loss": 0.0001,
|
| 13340 |
+
"step": 1893
|
| 13341 |
+
},
|
| 13342 |
+
{
|
| 13343 |
+
"epoch": 0.7769459542611015,
|
| 13344 |
+
"grad_norm": 0.07540201395750046,
|
| 13345 |
+
"learning_rate": 0.00016936334145000367,
|
| 13346 |
+
"loss": 0.0006,
|
| 13347 |
+
"step": 1894
|
| 13348 |
+
},
|
| 13349 |
+
{
|
| 13350 |
+
"epoch": 0.7773561685980925,
|
| 13351 |
+
"grad_norm": 0.23226173222064972,
|
| 13352 |
+
"learning_rate": 0.00016933225440472286,
|
| 13353 |
+
"loss": 0.0111,
|
| 13354 |
+
"step": 1895
|
| 13355 |
+
},
|
| 13356 |
+
{
|
| 13357 |
+
"epoch": 0.7777663829350836,
|
| 13358 |
+
"grad_norm": 0.0060230218805372715,
|
| 13359 |
+
"learning_rate": 0.00016930115445161912,
|
| 13360 |
+
"loss": 0.0002,
|
| 13361 |
+
"step": 1896
|
| 13362 |
+
},
|
| 13363 |
+
{
|
| 13364 |
+
"epoch": 0.7781765972720747,
|
| 13365 |
+
"grad_norm": 0.027786266058683395,
|
| 13366 |
+
"learning_rate": 0.00016927004159648243,
|
| 13367 |
+
"loss": 0.0007,
|
| 13368 |
+
"step": 1897
|
| 13369 |
+
},
|
| 13370 |
+
{
|
| 13371 |
+
"epoch": 0.7785868116090657,
|
| 13372 |
+
"grad_norm": 0.06548523157835007,
|
| 13373 |
+
"learning_rate": 0.00016923891584510523,
|
| 13374 |
+
"loss": 0.0005,
|
| 13375 |
+
"step": 1898
|
| 13376 |
+
},
|
| 13377 |
+
{
|
| 13378 |
+
"epoch": 0.7789970259460568,
|
| 13379 |
+
"grad_norm": 0.03102070838212967,
|
| 13380 |
+
"learning_rate": 0.0001692077772032823,
|
| 13381 |
+
"loss": 0.0007,
|
| 13382 |
+
"step": 1899
|
| 13383 |
+
},
|
| 13384 |
+
{
|
| 13385 |
+
"epoch": 0.7794072402830479,
|
| 13386 |
+
"grad_norm": 0.0020308480598032475,
|
| 13387 |
+
"learning_rate": 0.00016917662567681076,
|
| 13388 |
+
"loss": 0.0001,
|
| 13389 |
+
"step": 1900
|
| 13390 |
+
},
|
| 13391 |
+
{
|
| 13392 |
+
"epoch": 0.7798174546200389,
|
| 13393 |
+
"grad_norm": 3.757835626602173,
|
| 13394 |
+
"learning_rate": 0.00016914546127149026,
|
| 13395 |
+
"loss": 0.0333,
|
| 13396 |
+
"step": 1901
|
| 13397 |
+
},
|
| 13398 |
+
{
|
| 13399 |
+
"epoch": 0.78022766895703,
|
| 13400 |
+
"grad_norm": 0.011899041943252087,
|
| 13401 |
+
"learning_rate": 0.00016911428399312277,
|
| 13402 |
+
"loss": 0.0004,
|
| 13403 |
+
"step": 1902
|
| 13404 |
+
},
|
| 13405 |
+
{
|
| 13406 |
+
"epoch": 0.7806378832940212,
|
| 13407 |
+
"grad_norm": 0.20026826858520508,
|
| 13408 |
+
"learning_rate": 0.00016908309384751265,
|
| 13409 |
+
"loss": 0.0025,
|
| 13410 |
+
"step": 1903
|
| 13411 |
+
},
|
| 13412 |
+
{
|
| 13413 |
+
"epoch": 0.7810480976310122,
|
| 13414 |
+
"grad_norm": 0.0022054959554225206,
|
| 13415 |
+
"learning_rate": 0.00016905189084046672,
|
| 13416 |
+
"loss": 0.0001,
|
| 13417 |
+
"step": 1904
|
| 13418 |
+
},
|
| 13419 |
+
{
|
| 13420 |
+
"epoch": 0.7814583119680033,
|
| 13421 |
+
"grad_norm": 0.00014092907076701522,
|
| 13422 |
+
"learning_rate": 0.00016902067497779407,
|
| 13423 |
+
"loss": 0.0,
|
| 13424 |
+
"step": 1905
|
| 13425 |
+
},
|
| 13426 |
+
{
|
| 13427 |
+
"epoch": 0.7818685263049944,
|
| 13428 |
+
"grad_norm": 0.001451042597182095,
|
| 13429 |
+
"learning_rate": 0.00016898944626530637,
|
| 13430 |
+
"loss": 0.0,
|
| 13431 |
+
"step": 1906
|
| 13432 |
+
},
|
| 13433 |
+
{
|
| 13434 |
+
"epoch": 0.7822787406419854,
|
| 13435 |
+
"grad_norm": 0.0005872730980627239,
|
| 13436 |
+
"learning_rate": 0.0001689582047088175,
|
| 13437 |
+
"loss": 0.0,
|
| 13438 |
+
"step": 1907
|
| 13439 |
+
},
|
| 13440 |
+
{
|
| 13441 |
+
"epoch": 0.7826889549789765,
|
| 13442 |
+
"grad_norm": 0.0044991071335971355,
|
| 13443 |
+
"learning_rate": 0.00016892695031414383,
|
| 13444 |
+
"loss": 0.0002,
|
| 13445 |
+
"step": 1908
|
| 13446 |
+
},
|
| 13447 |
+
{
|
| 13448 |
+
"epoch": 0.7830991693159676,
|
| 13449 |
+
"grad_norm": 0.005360184237360954,
|
| 13450 |
+
"learning_rate": 0.0001688956830871041,
|
| 13451 |
+
"loss": 0.0001,
|
| 13452 |
+
"step": 1909
|
| 13453 |
+
},
|
| 13454 |
+
{
|
| 13455 |
+
"epoch": 0.7835093836529586,
|
| 13456 |
+
"grad_norm": 0.21680708229541779,
|
| 13457 |
+
"learning_rate": 0.00016886440303351942,
|
| 13458 |
+
"loss": 0.0094,
|
| 13459 |
+
"step": 1910
|
| 13460 |
+
},
|
| 13461 |
+
{
|
| 13462 |
+
"epoch": 0.7839195979899497,
|
| 13463 |
+
"grad_norm": 0.0017402676166966558,
|
| 13464 |
+
"learning_rate": 0.00016883311015921334,
|
| 13465 |
+
"loss": 0.0001,
|
| 13466 |
+
"step": 1911
|
| 13467 |
+
},
|
| 13468 |
+
{
|
| 13469 |
+
"epoch": 0.7843298123269409,
|
| 13470 |
+
"grad_norm": 1.0862468481063843,
|
| 13471 |
+
"learning_rate": 0.00016880180447001174,
|
| 13472 |
+
"loss": 0.0177,
|
| 13473 |
+
"step": 1912
|
| 13474 |
+
},
|
| 13475 |
+
{
|
| 13476 |
+
"epoch": 0.7847400266639319,
|
| 13477 |
+
"grad_norm": 0.0008529541082680225,
|
| 13478 |
+
"learning_rate": 0.0001687704859717429,
|
| 13479 |
+
"loss": 0.0001,
|
| 13480 |
+
"step": 1913
|
| 13481 |
+
},
|
| 13482 |
+
{
|
| 13483 |
+
"epoch": 0.785150241000923,
|
| 13484 |
+
"grad_norm": 0.13935452699661255,
|
| 13485 |
+
"learning_rate": 0.0001687391546702375,
|
| 13486 |
+
"loss": 0.008,
|
| 13487 |
+
"step": 1914
|
| 13488 |
+
},
|
| 13489 |
+
{
|
| 13490 |
+
"epoch": 0.7855604553379141,
|
| 13491 |
+
"grad_norm": 0.00019784425967372954,
|
| 13492 |
+
"learning_rate": 0.00016870781057132857,
|
| 13493 |
+
"loss": 0.0,
|
| 13494 |
+
"step": 1915
|
| 13495 |
+
},
|
| 13496 |
+
{
|
| 13497 |
+
"epoch": 0.7859706696749051,
|
| 13498 |
+
"grad_norm": 0.17159759998321533,
|
| 13499 |
+
"learning_rate": 0.0001686764536808516,
|
| 13500 |
+
"loss": 0.0019,
|
| 13501 |
+
"step": 1916
|
| 13502 |
+
},
|
| 13503 |
+
{
|
| 13504 |
+
"epoch": 0.7863808840118962,
|
| 13505 |
+
"grad_norm": 0.0010946192778646946,
|
| 13506 |
+
"learning_rate": 0.0001686450840046444,
|
| 13507 |
+
"loss": 0.0001,
|
| 13508 |
+
"step": 1917
|
| 13509 |
+
},
|
| 13510 |
+
{
|
| 13511 |
+
"epoch": 0.7867910983488873,
|
| 13512 |
+
"grad_norm": 0.47995659708976746,
|
| 13513 |
+
"learning_rate": 0.00016861370154854709,
|
| 13514 |
+
"loss": 0.0023,
|
| 13515 |
+
"step": 1918
|
| 13516 |
+
},
|
| 13517 |
+
{
|
| 13518 |
+
"epoch": 0.7872013126858783,
|
| 13519 |
+
"grad_norm": 0.03290826454758644,
|
| 13520 |
+
"learning_rate": 0.00016858230631840236,
|
| 13521 |
+
"loss": 0.0012,
|
| 13522 |
+
"step": 1919
|
| 13523 |
+
},
|
| 13524 |
+
{
|
| 13525 |
+
"epoch": 0.7876115270228694,
|
| 13526 |
+
"grad_norm": 0.007945411838591099,
|
| 13527 |
+
"learning_rate": 0.00016855089832005512,
|
| 13528 |
+
"loss": 0.0003,
|
| 13529 |
+
"step": 1920
|
| 13530 |
+
},
|
| 13531 |
+
{
|
| 13532 |
+
"epoch": 0.7880217413598605,
|
| 13533 |
+
"grad_norm": 0.0042945523746311665,
|
| 13534 |
+
"learning_rate": 0.0001685194775593527,
|
| 13535 |
+
"loss": 0.0002,
|
| 13536 |
+
"step": 1921
|
| 13537 |
+
},
|
| 13538 |
+
{
|
| 13539 |
+
"epoch": 0.7884319556968516,
|
| 13540 |
+
"grad_norm": 0.029409119859337807,
|
| 13541 |
+
"learning_rate": 0.0001684880440421448,
|
| 13542 |
+
"loss": 0.001,
|
| 13543 |
+
"step": 1922
|
| 13544 |
+
},
|
| 13545 |
+
{
|
| 13546 |
+
"epoch": 0.7888421700338427,
|
| 13547 |
+
"grad_norm": 0.20772117376327515,
|
| 13548 |
+
"learning_rate": 0.0001684565977742835,
|
| 13549 |
+
"loss": 0.0044,
|
| 13550 |
+
"step": 1923
|
| 13551 |
+
},
|
| 13552 |
+
{
|
| 13553 |
+
"epoch": 0.7892523843708338,
|
| 13554 |
+
"grad_norm": 0.004390776623040438,
|
| 13555 |
+
"learning_rate": 0.0001684251387616233,
|
| 13556 |
+
"loss": 0.0002,
|
| 13557 |
+
"step": 1924
|
| 13558 |
+
},
|
| 13559 |
+
{
|
| 13560 |
+
"epoch": 0.7896625987078248,
|
| 13561 |
+
"grad_norm": 0.003347913036122918,
|
| 13562 |
+
"learning_rate": 0.00016839366701002103,
|
| 13563 |
+
"loss": 0.0001,
|
| 13564 |
+
"step": 1925
|
| 13565 |
+
},
|
| 13566 |
+
{
|
| 13567 |
+
"epoch": 0.7900728130448159,
|
| 13568 |
+
"grad_norm": 0.0004986366839148104,
|
| 13569 |
+
"learning_rate": 0.00016836218252533585,
|
| 13570 |
+
"loss": 0.0,
|
| 13571 |
+
"step": 1926
|
| 13572 |
+
},
|
| 13573 |
+
{
|
| 13574 |
+
"epoch": 0.790483027381807,
|
| 13575 |
+
"grad_norm": 0.0017950295004993677,
|
| 13576 |
+
"learning_rate": 0.00016833068531342936,
|
| 13577 |
+
"loss": 0.0001,
|
| 13578 |
+
"step": 1927
|
| 13579 |
+
},
|
| 13580 |
+
{
|
| 13581 |
+
"epoch": 0.790893241718798,
|
| 13582 |
+
"grad_norm": 0.0007749379728920758,
|
| 13583 |
+
"learning_rate": 0.0001682991753801655,
|
| 13584 |
+
"loss": 0.0,
|
| 13585 |
+
"step": 1928
|
| 13586 |
+
},
|
| 13587 |
+
{
|
| 13588 |
+
"epoch": 0.7913034560557891,
|
| 13589 |
+
"grad_norm": 0.004895780235528946,
|
| 13590 |
+
"learning_rate": 0.0001682676527314106,
|
| 13591 |
+
"loss": 0.0001,
|
| 13592 |
+
"step": 1929
|
| 13593 |
+
},
|
| 13594 |
+
{
|
| 13595 |
+
"epoch": 0.7917136703927802,
|
| 13596 |
+
"grad_norm": 0.0009641271317377687,
|
| 13597 |
+
"learning_rate": 0.0001682361173730333,
|
| 13598 |
+
"loss": 0.0,
|
| 13599 |
+
"step": 1930
|
| 13600 |
+
},
|
| 13601 |
+
{
|
| 13602 |
+
"epoch": 0.7921238847297714,
|
| 13603 |
+
"grad_norm": 0.00035252628731541336,
|
| 13604 |
+
"learning_rate": 0.00016820456931090468,
|
| 13605 |
+
"loss": 0.0,
|
| 13606 |
+
"step": 1931
|
| 13607 |
+
},
|
| 13608 |
+
{
|
| 13609 |
+
"epoch": 0.7925340990667624,
|
| 13610 |
+
"grad_norm": 0.00036453409120440483,
|
| 13611 |
+
"learning_rate": 0.00016817300855089817,
|
| 13612 |
+
"loss": 0.0,
|
| 13613 |
+
"step": 1932
|
| 13614 |
+
},
|
| 13615 |
+
{
|
| 13616 |
+
"epoch": 0.7929443134037535,
|
| 13617 |
+
"grad_norm": 0.00038114868220873177,
|
| 13618 |
+
"learning_rate": 0.00016814143509888947,
|
| 13619 |
+
"loss": 0.0,
|
| 13620 |
+
"step": 1933
|
| 13621 |
+
},
|
| 13622 |
+
{
|
| 13623 |
+
"epoch": 0.7933545277407446,
|
| 13624 |
+
"grad_norm": 0.00035392382415011525,
|
| 13625 |
+
"learning_rate": 0.00016810984896075676,
|
| 13626 |
+
"loss": 0.0,
|
| 13627 |
+
"step": 1934
|
| 13628 |
+
},
|
| 13629 |
+
{
|
| 13630 |
+
"epoch": 0.7937647420777356,
|
| 13631 |
+
"grad_norm": 0.0007073191227391362,
|
| 13632 |
+
"learning_rate": 0.00016807825014238055,
|
| 13633 |
+
"loss": 0.0,
|
| 13634 |
+
"step": 1935
|
| 13635 |
+
},
|
| 13636 |
+
{
|
| 13637 |
+
"epoch": 0.7941749564147267,
|
| 13638 |
+
"grad_norm": 0.009715719148516655,
|
| 13639 |
+
"learning_rate": 0.0001680466386496437,
|
| 13640 |
+
"loss": 0.0003,
|
| 13641 |
+
"step": 1936
|
| 13642 |
+
},
|
| 13643 |
+
{
|
| 13644 |
+
"epoch": 0.7945851707517178,
|
| 13645 |
+
"grad_norm": 0.00023608510673511773,
|
| 13646 |
+
"learning_rate": 0.00016801501448843143,
|
| 13647 |
+
"loss": 0.0,
|
| 13648 |
+
"step": 1937
|
| 13649 |
+
},
|
| 13650 |
+
{
|
| 13651 |
+
"epoch": 0.7949953850887088,
|
| 13652 |
+
"grad_norm": 0.016205789521336555,
|
| 13653 |
+
"learning_rate": 0.00016798337766463128,
|
| 13654 |
+
"loss": 0.0002,
|
| 13655 |
+
"step": 1938
|
| 13656 |
+
},
|
| 13657 |
+
{
|
| 13658 |
+
"epoch": 0.7954055994256999,
|
| 13659 |
+
"grad_norm": 0.00014060101239010692,
|
| 13660 |
+
"learning_rate": 0.00016795172818413322,
|
| 13661 |
+
"loss": 0.0,
|
| 13662 |
+
"step": 1939
|
| 13663 |
+
},
|
| 13664 |
+
{
|
| 13665 |
+
"epoch": 0.7958158137626911,
|
| 13666 |
+
"grad_norm": 0.00040802141302265227,
|
| 13667 |
+
"learning_rate": 0.00016792006605282955,
|
| 13668 |
+
"loss": 0.0,
|
| 13669 |
+
"step": 1940
|
| 13670 |
+
},
|
| 13671 |
+
{
|
| 13672 |
+
"epoch": 0.7962260280996821,
|
| 13673 |
+
"grad_norm": 0.0005256585427559912,
|
| 13674 |
+
"learning_rate": 0.0001678883912766149,
|
| 13675 |
+
"loss": 0.0,
|
| 13676 |
+
"step": 1941
|
| 13677 |
+
},
|
| 13678 |
+
{
|
| 13679 |
+
"epoch": 0.7966362424366732,
|
| 13680 |
+
"grad_norm": 0.0001477671175962314,
|
| 13681 |
+
"learning_rate": 0.0001678567038613863,
|
| 13682 |
+
"loss": 0.0,
|
| 13683 |
+
"step": 1942
|
| 13684 |
+
},
|
| 13685 |
+
{
|
| 13686 |
+
"epoch": 0.7970464567736643,
|
| 13687 |
+
"grad_norm": 0.00040902665932662785,
|
| 13688 |
+
"learning_rate": 0.00016782500381304303,
|
| 13689 |
+
"loss": 0.0,
|
| 13690 |
+
"step": 1943
|
| 13691 |
+
},
|
| 13692 |
+
{
|
| 13693 |
+
"epoch": 0.7974566711106553,
|
| 13694 |
+
"grad_norm": 0.00044157792581245303,
|
| 13695 |
+
"learning_rate": 0.00016779329113748688,
|
| 13696 |
+
"loss": 0.0,
|
| 13697 |
+
"step": 1944
|
| 13698 |
+
},
|
| 13699 |
+
{
|
| 13700 |
+
"epoch": 0.7978668854476464,
|
| 13701 |
+
"grad_norm": 0.0011935592629015446,
|
| 13702 |
+
"learning_rate": 0.00016776156584062186,
|
| 13703 |
+
"loss": 0.0001,
|
| 13704 |
+
"step": 1945
|
| 13705 |
+
},
|
| 13706 |
+
{
|
| 13707 |
+
"epoch": 0.7982770997846375,
|
| 13708 |
+
"grad_norm": 0.04246418550610542,
|
| 13709 |
+
"learning_rate": 0.00016772982792835438,
|
| 13710 |
+
"loss": 0.0005,
|
| 13711 |
+
"step": 1946
|
| 13712 |
+
},
|
| 13713 |
+
{
|
| 13714 |
+
"epoch": 0.7986873141216285,
|
| 13715 |
+
"grad_norm": 0.0004147049330640584,
|
| 13716 |
+
"learning_rate": 0.0001676980774065932,
|
| 13717 |
+
"loss": 0.0,
|
| 13718 |
+
"step": 1947
|
| 13719 |
+
},
|
| 13720 |
+
{
|
| 13721 |
+
"epoch": 0.7990975284586196,
|
| 13722 |
+
"grad_norm": 0.00015836721286177635,
|
| 13723 |
+
"learning_rate": 0.00016766631428124948,
|
| 13724 |
+
"loss": 0.0,
|
| 13725 |
+
"step": 1948
|
| 13726 |
+
},
|
| 13727 |
+
{
|
| 13728 |
+
"epoch": 0.7995077427956107,
|
| 13729 |
+
"grad_norm": 0.001112595316953957,
|
| 13730 |
+
"learning_rate": 0.0001676345385582366,
|
| 13731 |
+
"loss": 0.0,
|
| 13732 |
+
"step": 1949
|
| 13733 |
+
},
|
| 13734 |
+
{
|
| 13735 |
+
"epoch": 0.7999179571326018,
|
| 13736 |
+
"grad_norm": 0.00028212351026013494,
|
| 13737 |
+
"learning_rate": 0.00016760275024347036,
|
| 13738 |
+
"loss": 0.0,
|
| 13739 |
+
"step": 1950
|
| 13740 |
+
},
|
| 13741 |
+
{
|
| 13742 |
+
"epoch": 0.8003281714695929,
|
| 13743 |
+
"grad_norm": 0.00014074040518607944,
|
| 13744 |
+
"learning_rate": 0.0001675709493428689,
|
| 13745 |
+
"loss": 0.0,
|
| 13746 |
+
"step": 1951
|
| 13747 |
+
},
|
| 13748 |
+
{
|
| 13749 |
+
"epoch": 0.800738385806584,
|
| 13750 |
+
"grad_norm": 0.00035598076647147536,
|
| 13751 |
+
"learning_rate": 0.00016753913586235272,
|
| 13752 |
+
"loss": 0.0,
|
| 13753 |
+
"step": 1952
|
| 13754 |
+
},
|
| 13755 |
+
{
|
| 13756 |
+
"epoch": 0.801148600143575,
|
| 13757 |
+
"grad_norm": 0.27851057052612305,
|
| 13758 |
+
"learning_rate": 0.00016750730980784467,
|
| 13759 |
+
"loss": 0.0432,
|
| 13760 |
+
"step": 1953
|
| 13761 |
+
},
|
| 13762 |
+
{
|
| 13763 |
+
"epoch": 0.8015588144805661,
|
| 13764 |
+
"grad_norm": 0.002487306483089924,
|
| 13765 |
+
"learning_rate": 0.00016747547118526988,
|
| 13766 |
+
"loss": 0.0001,
|
| 13767 |
+
"step": 1954
|
| 13768 |
+
},
|
| 13769 |
+
{
|
| 13770 |
+
"epoch": 0.8019690288175572,
|
| 13771 |
+
"grad_norm": 0.03598805144429207,
|
| 13772 |
+
"learning_rate": 0.00016744362000055584,
|
| 13773 |
+
"loss": 0.0007,
|
| 13774 |
+
"step": 1955
|
| 13775 |
+
},
|
| 13776 |
+
{
|
| 13777 |
+
"epoch": 0.8023792431545482,
|
| 13778 |
+
"grad_norm": 0.0003730082244146615,
|
| 13779 |
+
"learning_rate": 0.0001674117562596324,
|
| 13780 |
+
"loss": 0.0,
|
| 13781 |
+
"step": 1956
|
| 13782 |
+
},
|
| 13783 |
+
{
|
| 13784 |
+
"epoch": 0.8027894574915393,
|
| 13785 |
+
"grad_norm": 8.396896009799093e-05,
|
| 13786 |
+
"learning_rate": 0.00016737987996843178,
|
| 13787 |
+
"loss": 0.0,
|
| 13788 |
+
"step": 1957
|
| 13789 |
+
},
|
| 13790 |
+
{
|
| 13791 |
+
"epoch": 0.8031996718285304,
|
| 13792 |
+
"grad_norm": 0.000370534515241161,
|
| 13793 |
+
"learning_rate": 0.00016734799113288847,
|
| 13794 |
+
"loss": 0.0,
|
| 13795 |
+
"step": 1958
|
| 13796 |
+
},
|
| 13797 |
+
{
|
| 13798 |
+
"epoch": 0.8036098861655215,
|
| 13799 |
+
"grad_norm": 0.00038109999150037766,
|
| 13800 |
+
"learning_rate": 0.00016731608975893933,
|
| 13801 |
+
"loss": 0.0,
|
| 13802 |
+
"step": 1959
|
| 13803 |
+
},
|
| 13804 |
+
{
|
| 13805 |
+
"epoch": 0.8040201005025126,
|
| 13806 |
+
"grad_norm": 0.0010175567585974932,
|
| 13807 |
+
"learning_rate": 0.00016728417585252356,
|
| 13808 |
+
"loss": 0.0001,
|
| 13809 |
+
"step": 1960
|
| 13810 |
+
},
|
| 13811 |
+
{
|
| 13812 |
+
"epoch": 0.8044303148395037,
|
| 13813 |
+
"grad_norm": 0.0007487550028599799,
|
| 13814 |
+
"learning_rate": 0.00016725224941958265,
|
| 13815 |
+
"loss": 0.0,
|
| 13816 |
+
"step": 1961
|
| 13817 |
+
},
|
| 13818 |
+
{
|
| 13819 |
+
"epoch": 0.8048405291764947,
|
| 13820 |
+
"grad_norm": 0.007250356953591108,
|
| 13821 |
+
"learning_rate": 0.00016722031046606045,
|
| 13822 |
+
"loss": 0.0002,
|
| 13823 |
+
"step": 1962
|
| 13824 |
+
},
|
| 13825 |
+
{
|
| 13826 |
+
"epoch": 0.8052507435134858,
|
| 13827 |
+
"grad_norm": 0.00026679731672629714,
|
| 13828 |
+
"learning_rate": 0.00016718835899790318,
|
| 13829 |
+
"loss": 0.0,
|
| 13830 |
+
"step": 1963
|
| 13831 |
+
},
|
| 13832 |
+
{
|
| 13833 |
+
"epoch": 0.8056609578504769,
|
| 13834 |
+
"grad_norm": 0.0017098303651437163,
|
| 13835 |
+
"learning_rate": 0.00016715639502105938,
|
| 13836 |
+
"loss": 0.0001,
|
| 13837 |
+
"step": 1964
|
| 13838 |
+
},
|
| 13839 |
+
{
|
| 13840 |
+
"epoch": 0.8060711721874679,
|
| 13841 |
+
"grad_norm": 0.3836878836154938,
|
| 13842 |
+
"learning_rate": 0.0001671244185414798,
|
| 13843 |
+
"loss": 0.0059,
|
| 13844 |
+
"step": 1965
|
| 13845 |
+
},
|
| 13846 |
+
{
|
| 13847 |
+
"epoch": 0.806481386524459,
|
| 13848 |
+
"grad_norm": 0.004248309414833784,
|
| 13849 |
+
"learning_rate": 0.00016709242956511767,
|
| 13850 |
+
"loss": 0.0001,
|
| 13851 |
+
"step": 1966
|
| 13852 |
+
},
|
| 13853 |
+
{
|
| 13854 |
+
"epoch": 0.8068916008614501,
|
| 13855 |
+
"grad_norm": 0.002994704060256481,
|
| 13856 |
+
"learning_rate": 0.0001670604280979285,
|
| 13857 |
+
"loss": 0.0001,
|
| 13858 |
+
"step": 1967
|
| 13859 |
+
},
|
| 13860 |
+
{
|
| 13861 |
+
"epoch": 0.8073018151984411,
|
| 13862 |
+
"grad_norm": 0.00019125927065033466,
|
| 13863 |
+
"learning_rate": 0.0001670284141458701,
|
| 13864 |
+
"loss": 0.0,
|
| 13865 |
+
"step": 1968
|
| 13866 |
+
},
|
| 13867 |
+
{
|
| 13868 |
+
"epoch": 0.8077120295354323,
|
| 13869 |
+
"grad_norm": 0.0035772384144365788,
|
| 13870 |
+
"learning_rate": 0.00016699638771490257,
|
| 13871 |
+
"loss": 0.0001,
|
| 13872 |
+
"step": 1969
|
| 13873 |
+
},
|
| 13874 |
+
{
|
| 13875 |
+
"epoch": 0.8081222438724234,
|
| 13876 |
+
"grad_norm": 0.24614709615707397,
|
| 13877 |
+
"learning_rate": 0.00016696434881098844,
|
| 13878 |
+
"loss": 0.002,
|
| 13879 |
+
"step": 1970
|
| 13880 |
+
},
|
| 13881 |
+
{
|
| 13882 |
+
"epoch": 0.8085324582094144,
|
| 13883 |
+
"grad_norm": 0.00042936106910929084,
|
| 13884 |
+
"learning_rate": 0.00016693229744009247,
|
| 13885 |
+
"loss": 0.0,
|
| 13886 |
+
"step": 1971
|
| 13887 |
+
},
|
| 13888 |
+
{
|
| 13889 |
+
"epoch": 0.8089426725464055,
|
| 13890 |
+
"grad_norm": 0.0027135710697621107,
|
| 13891 |
+
"learning_rate": 0.00016690023360818178,
|
| 13892 |
+
"loss": 0.0001,
|
| 13893 |
+
"step": 1972
|
| 13894 |
+
},
|
| 13895 |
+
{
|
| 13896 |
+
"epoch": 0.8093528868833966,
|
| 13897 |
+
"grad_norm": 0.0020951367914676666,
|
| 13898 |
+
"learning_rate": 0.00016686815732122582,
|
| 13899 |
+
"loss": 0.0001,
|
| 13900 |
+
"step": 1973
|
| 13901 |
+
},
|
| 13902 |
+
{
|
| 13903 |
+
"epoch": 0.8097631012203876,
|
| 13904 |
+
"grad_norm": 0.0015644457889720798,
|
| 13905 |
+
"learning_rate": 0.00016683606858519635,
|
| 13906 |
+
"loss": 0.0001,
|
| 13907 |
+
"step": 1974
|
| 13908 |
+
},
|
| 13909 |
+
{
|
| 13910 |
+
"epoch": 0.8101733155573787,
|
| 13911 |
+
"grad_norm": 0.0009489025687798858,
|
| 13912 |
+
"learning_rate": 0.00016680396740606737,
|
| 13913 |
+
"loss": 0.0001,
|
| 13914 |
+
"step": 1975
|
| 13915 |
+
},
|
| 13916 |
+
{
|
| 13917 |
+
"epoch": 0.8105835298943698,
|
| 13918 |
+
"grad_norm": 0.017102990299463272,
|
| 13919 |
+
"learning_rate": 0.00016677185378981534,
|
| 13920 |
+
"loss": 0.0002,
|
| 13921 |
+
"step": 1976
|
| 13922 |
+
},
|
| 13923 |
+
{
|
| 13924 |
+
"epoch": 0.8109937442313608,
|
| 13925 |
+
"grad_norm": 0.0028788631316274405,
|
| 13926 |
+
"learning_rate": 0.00016673972774241896,
|
| 13927 |
+
"loss": 0.0001,
|
| 13928 |
+
"step": 1977
|
| 13929 |
+
},
|
| 13930 |
+
{
|
| 13931 |
+
"epoch": 0.811403958568352,
|
| 13932 |
+
"grad_norm": 0.0013644604478031397,
|
| 13933 |
+
"learning_rate": 0.00016670758926985921,
|
| 13934 |
+
"loss": 0.0001,
|
| 13935 |
+
"step": 1978
|
| 13936 |
+
},
|
| 13937 |
+
{
|
| 13938 |
+
"epoch": 0.8118141729053431,
|
| 13939 |
+
"grad_norm": 0.020865008234977722,
|
| 13940 |
+
"learning_rate": 0.00016667543837811942,
|
| 13941 |
+
"loss": 0.0003,
|
| 13942 |
+
"step": 1979
|
| 13943 |
+
},
|
| 13944 |
+
{
|
| 13945 |
+
"epoch": 0.8122243872423341,
|
| 13946 |
+
"grad_norm": 0.0012545837089419365,
|
| 13947 |
+
"learning_rate": 0.00016664327507318527,
|
| 13948 |
+
"loss": 0.0001,
|
| 13949 |
+
"step": 1980
|
| 13950 |
+
},
|
| 13951 |
+
{
|
| 13952 |
+
"epoch": 0.8126346015793252,
|
| 13953 |
+
"grad_norm": 0.00042665336513891816,
|
| 13954 |
+
"learning_rate": 0.00016661109936104464,
|
| 13955 |
+
"loss": 0.0,
|
| 13956 |
+
"step": 1981
|
| 13957 |
+
},
|
| 13958 |
+
{
|
| 13959 |
+
"epoch": 0.8130448159163163,
|
| 13960 |
+
"grad_norm": 0.0010399249149486423,
|
| 13961 |
+
"learning_rate": 0.00016657891124768787,
|
| 13962 |
+
"loss": 0.0001,
|
| 13963 |
+
"step": 1982
|
| 13964 |
+
},
|
| 13965 |
+
{
|
| 13966 |
+
"epoch": 0.8134550302533073,
|
| 13967 |
+
"grad_norm": 0.000738332630135119,
|
| 13968 |
+
"learning_rate": 0.00016654671073910752,
|
| 13969 |
+
"loss": 0.0001,
|
| 13970 |
+
"step": 1983
|
| 13971 |
+
},
|
| 13972 |
+
{
|
| 13973 |
+
"epoch": 0.8138652445902984,
|
| 13974 |
+
"grad_norm": 0.0011064078425988555,
|
| 13975 |
+
"learning_rate": 0.00016651449784129842,
|
| 13976 |
+
"loss": 0.0001,
|
| 13977 |
+
"step": 1984
|
| 13978 |
+
},
|
| 13979 |
+
{
|
| 13980 |
+
"epoch": 0.8142754589272895,
|
| 13981 |
+
"grad_norm": 0.0005077007226645947,
|
| 13982 |
+
"learning_rate": 0.00016648227256025778,
|
| 13983 |
+
"loss": 0.0,
|
| 13984 |
+
"step": 1985
|
| 13985 |
+
},
|
| 13986 |
+
{
|
| 13987 |
+
"epoch": 0.8146856732642805,
|
| 13988 |
+
"grad_norm": 0.0009330844040960073,
|
| 13989 |
+
"learning_rate": 0.00016645003490198512,
|
| 13990 |
+
"loss": 0.0,
|
| 13991 |
+
"step": 1986
|
| 13992 |
+
},
|
| 13993 |
+
{
|
| 13994 |
+
"epoch": 0.8150958876012717,
|
| 13995 |
+
"grad_norm": 0.000574357807636261,
|
| 13996 |
+
"learning_rate": 0.0001664177848724822,
|
| 13997 |
+
"loss": 0.0,
|
| 13998 |
+
"step": 1987
|
| 13999 |
+
},
|
| 14000 |
+
{
|
| 14001 |
+
"epoch": 0.8155061019382628,
|
| 14002 |
+
"grad_norm": 0.0004102467210032046,
|
| 14003 |
+
"learning_rate": 0.00016638552247775313,
|
| 14004 |
+
"loss": 0.0,
|
| 14005 |
+
"step": 1988
|
| 14006 |
+
},
|
| 14007 |
+
{
|
| 14008 |
+
"epoch": 0.8159163162752539,
|
| 14009 |
+
"grad_norm": 0.0002829334116540849,
|
| 14010 |
+
"learning_rate": 0.0001663532477238043,
|
| 14011 |
+
"loss": 0.0,
|
| 14012 |
+
"step": 1989
|
| 14013 |
+
},
|
| 14014 |
+
{
|
| 14015 |
+
"epoch": 0.8163265306122449,
|
| 14016 |
+
"grad_norm": 0.007908416911959648,
|
| 14017 |
+
"learning_rate": 0.0001663209606166444,
|
| 14018 |
+
"loss": 0.0001,
|
| 14019 |
+
"step": 1990
|
| 14020 |
+
},
|
| 14021 |
+
{
|
| 14022 |
+
"epoch": 0.816736744949236,
|
| 14023 |
+
"grad_norm": 0.0004376998113002628,
|
| 14024 |
+
"learning_rate": 0.00016628866116228448,
|
| 14025 |
+
"loss": 0.0,
|
| 14026 |
+
"step": 1991
|
| 14027 |
+
},
|
| 14028 |
+
{
|
| 14029 |
+
"epoch": 0.817146959286227,
|
| 14030 |
+
"grad_norm": 0.360516756772995,
|
| 14031 |
+
"learning_rate": 0.0001662563493667378,
|
| 14032 |
+
"loss": 0.0169,
|
| 14033 |
+
"step": 1992
|
| 14034 |
+
},
|
| 14035 |
+
{
|
| 14036 |
+
"epoch": 0.8175571736232181,
|
| 14037 |
+
"grad_norm": 0.0010267659090459347,
|
| 14038 |
+
"learning_rate": 0.00016622402523602,
|
| 14039 |
+
"loss": 0.0001,
|
| 14040 |
+
"step": 1993
|
| 14041 |
+
},
|
| 14042 |
+
{
|
| 14043 |
+
"epoch": 0.8179673879602092,
|
| 14044 |
+
"grad_norm": 0.0007340334123000503,
|
| 14045 |
+
"learning_rate": 0.0001661916887761489,
|
| 14046 |
+
"loss": 0.0,
|
| 14047 |
+
"step": 1994
|
| 14048 |
+
},
|
| 14049 |
+
{
|
| 14050 |
+
"epoch": 0.8183776022972002,
|
| 14051 |
+
"grad_norm": 0.00018298997019883245,
|
| 14052 |
+
"learning_rate": 0.00016615933999314476,
|
| 14053 |
+
"loss": 0.0,
|
| 14054 |
+
"step": 1995
|
| 14055 |
+
},
|
| 14056 |
+
{
|
| 14057 |
+
"epoch": 0.8187878166341913,
|
| 14058 |
+
"grad_norm": 0.0025684500578790903,
|
| 14059 |
+
"learning_rate": 0.00016612697889303,
|
| 14060 |
+
"loss": 0.0,
|
| 14061 |
+
"step": 1996
|
| 14062 |
+
},
|
| 14063 |
+
{
|
| 14064 |
+
"epoch": 0.8191980309711825,
|
| 14065 |
+
"grad_norm": 0.000144028032082133,
|
| 14066 |
+
"learning_rate": 0.00016609460548182948,
|
| 14067 |
+
"loss": 0.0,
|
| 14068 |
+
"step": 1997
|
| 14069 |
+
},
|
| 14070 |
+
{
|
| 14071 |
+
"epoch": 0.8196082453081736,
|
| 14072 |
+
"grad_norm": 0.0007032654830254614,
|
| 14073 |
+
"learning_rate": 0.0001660622197655702,
|
| 14074 |
+
"loss": 0.0001,
|
| 14075 |
+
"step": 1998
|
| 14076 |
+
},
|
| 14077 |
+
{
|
| 14078 |
+
"epoch": 0.8200184596451646,
|
| 14079 |
+
"grad_norm": 0.0002601140004117042,
|
| 14080 |
+
"learning_rate": 0.00016602982175028156,
|
| 14081 |
+
"loss": 0.0,
|
| 14082 |
+
"step": 1999
|
| 14083 |
+
},
|
| 14084 |
+
{
|
| 14085 |
+
"epoch": 0.8204286739821557,
|
| 14086 |
+
"grad_norm": 0.0005366207915358245,
|
| 14087 |
+
"learning_rate": 0.00016599741144199517,
|
| 14088 |
+
"loss": 0.0,
|
| 14089 |
+
"step": 2000
|
| 14090 |
+
},
|
| 14091 |
+
{
|
| 14092 |
+
"epoch": 0.8204286739821557,
|
| 14093 |
+
"eval_loss": 2.5830555387074128e-05,
|
| 14094 |
+
"eval_runtime": 11.1601,
|
| 14095 |
+
"eval_samples_per_second": 17.563,
|
| 14096 |
+
"eval_steps_per_second": 4.391,
|
| 14097 |
+
"step": 2000
|
| 14098 |
}
|
| 14099 |
],
|
| 14100 |
"logging_steps": 1,
|
|
|
|
| 14123 |
"attributes": {}
|
| 14124 |
}
|
| 14125 |
},
|
| 14126 |
+
"total_flos": 1.3240893372471706e+17,
|
| 14127 |
"train_batch_size": 4,
|
| 14128 |
"trial_name": null,
|
| 14129 |
"trial_params": null
|