Training in progress, step 20000, checkpoint
Browse files- last-checkpoint/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +2 -2
- last-checkpoint/global_step20000/mp_rank_00_model_states.pt +2 -2
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +876 -6
last-checkpoint/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9867ea7f15881e7bed68bb3b7781fc3c4f5646e0a9aec63231d97c009a1c403f
|
| 3 |
+
size 5117197020
|
last-checkpoint/global_step20000/mp_rank_00_model_states.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49d8bf456bdf7cfa2a6fed84991a6fc983b6fea67864bf0474df258a8f8c7541
|
| 3 |
+
size 859127504
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step20000
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 962205216
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:069fbc2b96ff55558de2b6621d0406b4fbcbc7edffe8d2472bb8b992e0abdb14
|
| 3 |
size 962205216
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef2e75134b208d60f6f9b30cef29e49813797dfcda4ce7d7e2cabca76bb3fa47
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29c7a79b53a589de48d3b7a21df9c0d024be4dea79f68869f72fdc01ae3b212a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 82.
|
| 3 |
-
"best_model_checkpoint": "./iteboshi_temp/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4937,6 +4937,876 @@
|
|
| 4937 |
"eval_steps_per_second": 1.554,
|
| 4938 |
"eval_wer": 82.998585572843,
|
| 4939 |
"step": 17000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4940 |
}
|
| 4941 |
],
|
| 4942 |
"logging_steps": 25,
|
|
@@ -4951,12 +5821,12 @@
|
|
| 4951 |
"should_evaluate": false,
|
| 4952 |
"should_log": false,
|
| 4953 |
"should_save": true,
|
| 4954 |
-
"should_training_stop":
|
| 4955 |
},
|
| 4956 |
"attributes": {}
|
| 4957 |
}
|
| 4958 |
},
|
| 4959 |
-
"total_flos":
|
| 4960 |
"train_batch_size": 4,
|
| 4961 |
"trial_name": null,
|
| 4962 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 82.34794908062236,
|
| 3 |
+
"best_model_checkpoint": "./iteboshi_temp/checkpoint-20000",
|
| 4 |
+
"epoch": 22.026431718061673,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 20000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4937 |
"eval_steps_per_second": 1.554,
|
| 4938 |
"eval_wer": 82.998585572843,
|
| 4939 |
"step": 17000
|
| 4940 |
+
},
|
| 4941 |
+
{
|
| 4942 |
+
"epoch": 18.75,
|
| 4943 |
+
"grad_norm": 0.04777693375945091,
|
| 4944 |
+
"learning_rate": 3.051282051282052e-06,
|
| 4945 |
+
"loss": 0.0023,
|
| 4946 |
+
"step": 17025
|
| 4947 |
+
},
|
| 4948 |
+
{
|
| 4949 |
+
"epoch": 18.777533039647576,
|
| 4950 |
+
"grad_norm": 0.012851621024310589,
|
| 4951 |
+
"learning_rate": 3.0256410256410256e-06,
|
| 4952 |
+
"loss": 0.0016,
|
| 4953 |
+
"step": 17050
|
| 4954 |
+
},
|
| 4955 |
+
{
|
| 4956 |
+
"epoch": 18.805066079295155,
|
| 4957 |
+
"grad_norm": 0.07990699261426926,
|
| 4958 |
+
"learning_rate": 3e-06,
|
| 4959 |
+
"loss": 0.0016,
|
| 4960 |
+
"step": 17075
|
| 4961 |
+
},
|
| 4962 |
+
{
|
| 4963 |
+
"epoch": 18.83259911894273,
|
| 4964 |
+
"grad_norm": 0.011805381625890732,
|
| 4965 |
+
"learning_rate": 2.9743589743589746e-06,
|
| 4966 |
+
"loss": 0.0027,
|
| 4967 |
+
"step": 17100
|
| 4968 |
+
},
|
| 4969 |
+
{
|
| 4970 |
+
"epoch": 18.860132158590307,
|
| 4971 |
+
"grad_norm": 0.14670372009277344,
|
| 4972 |
+
"learning_rate": 2.948717948717949e-06,
|
| 4973 |
+
"loss": 0.0026,
|
| 4974 |
+
"step": 17125
|
| 4975 |
+
},
|
| 4976 |
+
{
|
| 4977 |
+
"epoch": 18.887665198237887,
|
| 4978 |
+
"grad_norm": 0.023519041016697884,
|
| 4979 |
+
"learning_rate": 2.9230769230769236e-06,
|
| 4980 |
+
"loss": 0.0028,
|
| 4981 |
+
"step": 17150
|
| 4982 |
+
},
|
| 4983 |
+
{
|
| 4984 |
+
"epoch": 18.915198237885463,
|
| 4985 |
+
"grad_norm": 0.021847659721970558,
|
| 4986 |
+
"learning_rate": 2.897435897435898e-06,
|
| 4987 |
+
"loss": 0.0015,
|
| 4988 |
+
"step": 17175
|
| 4989 |
+
},
|
| 4990 |
+
{
|
| 4991 |
+
"epoch": 18.94273127753304,
|
| 4992 |
+
"grad_norm": 0.013796437531709671,
|
| 4993 |
+
"learning_rate": 2.8717948717948717e-06,
|
| 4994 |
+
"loss": 0.0023,
|
| 4995 |
+
"step": 17200
|
| 4996 |
+
},
|
| 4997 |
+
{
|
| 4998 |
+
"epoch": 18.970264317180618,
|
| 4999 |
+
"grad_norm": 0.1518554836511612,
|
| 5000 |
+
"learning_rate": 2.846153846153846e-06,
|
| 5001 |
+
"loss": 0.0016,
|
| 5002 |
+
"step": 17225
|
| 5003 |
+
},
|
| 5004 |
+
{
|
| 5005 |
+
"epoch": 18.997797356828194,
|
| 5006 |
+
"grad_norm": 0.012883415445685387,
|
| 5007 |
+
"learning_rate": 2.8205128205128207e-06,
|
| 5008 |
+
"loss": 0.0019,
|
| 5009 |
+
"step": 17250
|
| 5010 |
+
},
|
| 5011 |
+
{
|
| 5012 |
+
"epoch": 19.02533039647577,
|
| 5013 |
+
"grad_norm": 0.01099941972643137,
|
| 5014 |
+
"learning_rate": 2.794871794871795e-06,
|
| 5015 |
+
"loss": 0.0022,
|
| 5016 |
+
"step": 17275
|
| 5017 |
+
},
|
| 5018 |
+
{
|
| 5019 |
+
"epoch": 19.05286343612335,
|
| 5020 |
+
"grad_norm": 0.006992665119469166,
|
| 5021 |
+
"learning_rate": 2.7692307692307697e-06,
|
| 5022 |
+
"loss": 0.0011,
|
| 5023 |
+
"step": 17300
|
| 5024 |
+
},
|
| 5025 |
+
{
|
| 5026 |
+
"epoch": 19.080396475770925,
|
| 5027 |
+
"grad_norm": 0.012264972552657127,
|
| 5028 |
+
"learning_rate": 2.743589743589744e-06,
|
| 5029 |
+
"loss": 0.0014,
|
| 5030 |
+
"step": 17325
|
| 5031 |
+
},
|
| 5032 |
+
{
|
| 5033 |
+
"epoch": 19.1079295154185,
|
| 5034 |
+
"grad_norm": 0.04312492161989212,
|
| 5035 |
+
"learning_rate": 2.717948717948718e-06,
|
| 5036 |
+
"loss": 0.0012,
|
| 5037 |
+
"step": 17350
|
| 5038 |
+
},
|
| 5039 |
+
{
|
| 5040 |
+
"epoch": 19.13546255506608,
|
| 5041 |
+
"grad_norm": 0.008214226923882961,
|
| 5042 |
+
"learning_rate": 2.6923076923076923e-06,
|
| 5043 |
+
"loss": 0.0011,
|
| 5044 |
+
"step": 17375
|
| 5045 |
+
},
|
| 5046 |
+
{
|
| 5047 |
+
"epoch": 19.162995594713657,
|
| 5048 |
+
"grad_norm": 0.009182457812130451,
|
| 5049 |
+
"learning_rate": 2.666666666666667e-06,
|
| 5050 |
+
"loss": 0.0011,
|
| 5051 |
+
"step": 17400
|
| 5052 |
+
},
|
| 5053 |
+
{
|
| 5054 |
+
"epoch": 19.190528634361232,
|
| 5055 |
+
"grad_norm": 0.009743117727339268,
|
| 5056 |
+
"learning_rate": 2.6410256410256413e-06,
|
| 5057 |
+
"loss": 0.001,
|
| 5058 |
+
"step": 17425
|
| 5059 |
+
},
|
| 5060 |
+
{
|
| 5061 |
+
"epoch": 19.218061674008812,
|
| 5062 |
+
"grad_norm": 0.011959163472056389,
|
| 5063 |
+
"learning_rate": 2.615384615384616e-06,
|
| 5064 |
+
"loss": 0.001,
|
| 5065 |
+
"step": 17450
|
| 5066 |
+
},
|
| 5067 |
+
{
|
| 5068 |
+
"epoch": 19.245594713656388,
|
| 5069 |
+
"grad_norm": 0.033681828528642654,
|
| 5070 |
+
"learning_rate": 2.5897435897435903e-06,
|
| 5071 |
+
"loss": 0.0019,
|
| 5072 |
+
"step": 17475
|
| 5073 |
+
},
|
| 5074 |
+
{
|
| 5075 |
+
"epoch": 19.273127753303964,
|
| 5076 |
+
"grad_norm": 0.012354315258562565,
|
| 5077 |
+
"learning_rate": 2.564102564102564e-06,
|
| 5078 |
+
"loss": 0.0028,
|
| 5079 |
+
"step": 17500
|
| 5080 |
+
},
|
| 5081 |
+
{
|
| 5082 |
+
"epoch": 19.300660792951543,
|
| 5083 |
+
"grad_norm": 0.01059970073401928,
|
| 5084 |
+
"learning_rate": 2.5384615384615385e-06,
|
| 5085 |
+
"loss": 0.0018,
|
| 5086 |
+
"step": 17525
|
| 5087 |
+
},
|
| 5088 |
+
{
|
| 5089 |
+
"epoch": 19.32819383259912,
|
| 5090 |
+
"grad_norm": 0.007629127707332373,
|
| 5091 |
+
"learning_rate": 2.512820512820513e-06,
|
| 5092 |
+
"loss": 0.001,
|
| 5093 |
+
"step": 17550
|
| 5094 |
+
},
|
| 5095 |
+
{
|
| 5096 |
+
"epoch": 19.355726872246695,
|
| 5097 |
+
"grad_norm": 0.0125362453982234,
|
| 5098 |
+
"learning_rate": 2.4871794871794875e-06,
|
| 5099 |
+
"loss": 0.001,
|
| 5100 |
+
"step": 17575
|
| 5101 |
+
},
|
| 5102 |
+
{
|
| 5103 |
+
"epoch": 19.383259911894275,
|
| 5104 |
+
"grad_norm": 0.01261002104729414,
|
| 5105 |
+
"learning_rate": 2.461538461538462e-06,
|
| 5106 |
+
"loss": 0.0014,
|
| 5107 |
+
"step": 17600
|
| 5108 |
+
},
|
| 5109 |
+
{
|
| 5110 |
+
"epoch": 19.41079295154185,
|
| 5111 |
+
"grad_norm": 0.010447504930198193,
|
| 5112 |
+
"learning_rate": 2.435897435897436e-06,
|
| 5113 |
+
"loss": 0.0021,
|
| 5114 |
+
"step": 17625
|
| 5115 |
+
},
|
| 5116 |
+
{
|
| 5117 |
+
"epoch": 19.438325991189426,
|
| 5118 |
+
"grad_norm": 0.009724145755171776,
|
| 5119 |
+
"learning_rate": 2.4102564102564105e-06,
|
| 5120 |
+
"loss": 0.0021,
|
| 5121 |
+
"step": 17650
|
| 5122 |
+
},
|
| 5123 |
+
{
|
| 5124 |
+
"epoch": 19.465859030837006,
|
| 5125 |
+
"grad_norm": 0.008591737598180771,
|
| 5126 |
+
"learning_rate": 2.384615384615385e-06,
|
| 5127 |
+
"loss": 0.0013,
|
| 5128 |
+
"step": 17675
|
| 5129 |
+
},
|
| 5130 |
+
{
|
| 5131 |
+
"epoch": 19.493392070484582,
|
| 5132 |
+
"grad_norm": 0.008385499939322472,
|
| 5133 |
+
"learning_rate": 2.358974358974359e-06,
|
| 5134 |
+
"loss": 0.0017,
|
| 5135 |
+
"step": 17700
|
| 5136 |
+
},
|
| 5137 |
+
{
|
| 5138 |
+
"epoch": 19.520925110132158,
|
| 5139 |
+
"grad_norm": 0.04597390815615654,
|
| 5140 |
+
"learning_rate": 2.3333333333333336e-06,
|
| 5141 |
+
"loss": 0.0013,
|
| 5142 |
+
"step": 17725
|
| 5143 |
+
},
|
| 5144 |
+
{
|
| 5145 |
+
"epoch": 19.548458149779737,
|
| 5146 |
+
"grad_norm": 0.00930617842823267,
|
| 5147 |
+
"learning_rate": 2.307692307692308e-06,
|
| 5148 |
+
"loss": 0.0016,
|
| 5149 |
+
"step": 17750
|
| 5150 |
+
},
|
| 5151 |
+
{
|
| 5152 |
+
"epoch": 19.575991189427313,
|
| 5153 |
+
"grad_norm": 0.009862055070698261,
|
| 5154 |
+
"learning_rate": 2.282051282051282e-06,
|
| 5155 |
+
"loss": 0.0014,
|
| 5156 |
+
"step": 17775
|
| 5157 |
+
},
|
| 5158 |
+
{
|
| 5159 |
+
"epoch": 19.60352422907489,
|
| 5160 |
+
"grad_norm": 0.01388918049633503,
|
| 5161 |
+
"learning_rate": 2.2564102564102566e-06,
|
| 5162 |
+
"loss": 0.0011,
|
| 5163 |
+
"step": 17800
|
| 5164 |
+
},
|
| 5165 |
+
{
|
| 5166 |
+
"epoch": 19.63105726872247,
|
| 5167 |
+
"grad_norm": 0.010380508378148079,
|
| 5168 |
+
"learning_rate": 2.230769230769231e-06,
|
| 5169 |
+
"loss": 0.0022,
|
| 5170 |
+
"step": 17825
|
| 5171 |
+
},
|
| 5172 |
+
{
|
| 5173 |
+
"epoch": 19.658590308370044,
|
| 5174 |
+
"grad_norm": 0.003493061987683177,
|
| 5175 |
+
"learning_rate": 2.2051282051282052e-06,
|
| 5176 |
+
"loss": 0.001,
|
| 5177 |
+
"step": 17850
|
| 5178 |
+
},
|
| 5179 |
+
{
|
| 5180 |
+
"epoch": 19.68612334801762,
|
| 5181 |
+
"grad_norm": 0.00607143621891737,
|
| 5182 |
+
"learning_rate": 2.1794871794871797e-06,
|
| 5183 |
+
"loss": 0.0016,
|
| 5184 |
+
"step": 17875
|
| 5185 |
+
},
|
| 5186 |
+
{
|
| 5187 |
+
"epoch": 19.7136563876652,
|
| 5188 |
+
"grad_norm": 0.007698683068156242,
|
| 5189 |
+
"learning_rate": 2.153846153846154e-06,
|
| 5190 |
+
"loss": 0.0029,
|
| 5191 |
+
"step": 17900
|
| 5192 |
+
},
|
| 5193 |
+
{
|
| 5194 |
+
"epoch": 19.741189427312776,
|
| 5195 |
+
"grad_norm": 0.007107453886419535,
|
| 5196 |
+
"learning_rate": 2.1282051282051283e-06,
|
| 5197 |
+
"loss": 0.0018,
|
| 5198 |
+
"step": 17925
|
| 5199 |
+
},
|
| 5200 |
+
{
|
| 5201 |
+
"epoch": 19.76872246696035,
|
| 5202 |
+
"grad_norm": 0.0059033227153122425,
|
| 5203 |
+
"learning_rate": 2.1025641025641028e-06,
|
| 5204 |
+
"loss": 0.001,
|
| 5205 |
+
"step": 17950
|
| 5206 |
+
},
|
| 5207 |
+
{
|
| 5208 |
+
"epoch": 19.79625550660793,
|
| 5209 |
+
"grad_norm": 0.005275961942970753,
|
| 5210 |
+
"learning_rate": 2.0769230769230773e-06,
|
| 5211 |
+
"loss": 0.0026,
|
| 5212 |
+
"step": 17975
|
| 5213 |
+
},
|
| 5214 |
+
{
|
| 5215 |
+
"epoch": 19.823788546255507,
|
| 5216 |
+
"grad_norm": 0.016638007014989853,
|
| 5217 |
+
"learning_rate": 2.0512820512820513e-06,
|
| 5218 |
+
"loss": 0.0019,
|
| 5219 |
+
"step": 18000
|
| 5220 |
+
},
|
| 5221 |
+
{
|
| 5222 |
+
"epoch": 19.823788546255507,
|
| 5223 |
+
"eval_cer": 22.66344158747263,
|
| 5224 |
+
"eval_loss": 0.8900153040885925,
|
| 5225 |
+
"eval_runtime": 1717.0751,
|
| 5226 |
+
"eval_samples_per_second": 6.162,
|
| 5227 |
+
"eval_steps_per_second": 1.541,
|
| 5228 |
+
"eval_wer": 82.50825082508251,
|
| 5229 |
+
"step": 18000
|
| 5230 |
+
},
|
| 5231 |
+
{
|
| 5232 |
+
"epoch": 19.851321585903083,
|
| 5233 |
+
"grad_norm": 0.0051730177365243435,
|
| 5234 |
+
"learning_rate": 2.025641025641026e-06,
|
| 5235 |
+
"loss": 0.0013,
|
| 5236 |
+
"step": 18025
|
| 5237 |
+
},
|
| 5238 |
+
{
|
| 5239 |
+
"epoch": 19.878854625550662,
|
| 5240 |
+
"grad_norm": 0.00516405189409852,
|
| 5241 |
+
"learning_rate": 2.0000000000000003e-06,
|
| 5242 |
+
"loss": 0.0018,
|
| 5243 |
+
"step": 18050
|
| 5244 |
+
},
|
| 5245 |
+
{
|
| 5246 |
+
"epoch": 19.90638766519824,
|
| 5247 |
+
"grad_norm": 0.006816135719418526,
|
| 5248 |
+
"learning_rate": 1.9743589743589744e-06,
|
| 5249 |
+
"loss": 0.001,
|
| 5250 |
+
"step": 18075
|
| 5251 |
+
},
|
| 5252 |
+
{
|
| 5253 |
+
"epoch": 19.933920704845814,
|
| 5254 |
+
"grad_norm": 0.005780714098364115,
|
| 5255 |
+
"learning_rate": 1.948717948717949e-06,
|
| 5256 |
+
"loss": 0.0009,
|
| 5257 |
+
"step": 18100
|
| 5258 |
+
},
|
| 5259 |
+
{
|
| 5260 |
+
"epoch": 19.961453744493394,
|
| 5261 |
+
"grad_norm": 0.007895824499428272,
|
| 5262 |
+
"learning_rate": 1.9230769230769234e-06,
|
| 5263 |
+
"loss": 0.0011,
|
| 5264 |
+
"step": 18125
|
| 5265 |
+
},
|
| 5266 |
+
{
|
| 5267 |
+
"epoch": 19.98898678414097,
|
| 5268 |
+
"grad_norm": 0.00839215237647295,
|
| 5269 |
+
"learning_rate": 1.8974358974358975e-06,
|
| 5270 |
+
"loss": 0.0011,
|
| 5271 |
+
"step": 18150
|
| 5272 |
+
},
|
| 5273 |
+
{
|
| 5274 |
+
"epoch": 20.016519823788546,
|
| 5275 |
+
"grad_norm": 0.0035141175612807274,
|
| 5276 |
+
"learning_rate": 1.871794871794872e-06,
|
| 5277 |
+
"loss": 0.0011,
|
| 5278 |
+
"step": 18175
|
| 5279 |
+
},
|
| 5280 |
+
{
|
| 5281 |
+
"epoch": 20.044052863436125,
|
| 5282 |
+
"grad_norm": 0.008937545120716095,
|
| 5283 |
+
"learning_rate": 1.8461538461538465e-06,
|
| 5284 |
+
"loss": 0.0009,
|
| 5285 |
+
"step": 18200
|
| 5286 |
+
},
|
| 5287 |
+
{
|
| 5288 |
+
"epoch": 20.0715859030837,
|
| 5289 |
+
"grad_norm": 0.0037842292804270983,
|
| 5290 |
+
"learning_rate": 1.8205128205128205e-06,
|
| 5291 |
+
"loss": 0.0011,
|
| 5292 |
+
"step": 18225
|
| 5293 |
+
},
|
| 5294 |
+
{
|
| 5295 |
+
"epoch": 20.099118942731277,
|
| 5296 |
+
"grad_norm": 0.003870155429467559,
|
| 5297 |
+
"learning_rate": 1.794871794871795e-06,
|
| 5298 |
+
"loss": 0.0009,
|
| 5299 |
+
"step": 18250
|
| 5300 |
+
},
|
| 5301 |
+
{
|
| 5302 |
+
"epoch": 20.126651982378856,
|
| 5303 |
+
"grad_norm": 0.003817240707576275,
|
| 5304 |
+
"learning_rate": 1.7692307692307695e-06,
|
| 5305 |
+
"loss": 0.0009,
|
| 5306 |
+
"step": 18275
|
| 5307 |
+
},
|
| 5308 |
+
{
|
| 5309 |
+
"epoch": 20.154185022026432,
|
| 5310 |
+
"grad_norm": 0.007133571431040764,
|
| 5311 |
+
"learning_rate": 1.7435897435897436e-06,
|
| 5312 |
+
"loss": 0.0008,
|
| 5313 |
+
"step": 18300
|
| 5314 |
+
},
|
| 5315 |
+
{
|
| 5316 |
+
"epoch": 20.181718061674008,
|
| 5317 |
+
"grad_norm": 0.011461510322988033,
|
| 5318 |
+
"learning_rate": 1.717948717948718e-06,
|
| 5319 |
+
"loss": 0.0007,
|
| 5320 |
+
"step": 18325
|
| 5321 |
+
},
|
| 5322 |
+
{
|
| 5323 |
+
"epoch": 20.209251101321588,
|
| 5324 |
+
"grad_norm": 0.003969813231378794,
|
| 5325 |
+
"learning_rate": 1.6923076923076926e-06,
|
| 5326 |
+
"loss": 0.001,
|
| 5327 |
+
"step": 18350
|
| 5328 |
+
},
|
| 5329 |
+
{
|
| 5330 |
+
"epoch": 20.236784140969164,
|
| 5331 |
+
"grad_norm": 0.007272036280483007,
|
| 5332 |
+
"learning_rate": 1.6666666666666667e-06,
|
| 5333 |
+
"loss": 0.001,
|
| 5334 |
+
"step": 18375
|
| 5335 |
+
},
|
| 5336 |
+
{
|
| 5337 |
+
"epoch": 20.26431718061674,
|
| 5338 |
+
"grad_norm": 0.006936676800251007,
|
| 5339 |
+
"learning_rate": 1.6410256410256412e-06,
|
| 5340 |
+
"loss": 0.0009,
|
| 5341 |
+
"step": 18400
|
| 5342 |
+
},
|
| 5343 |
+
{
|
| 5344 |
+
"epoch": 20.291850220264315,
|
| 5345 |
+
"grad_norm": 0.005403169430792332,
|
| 5346 |
+
"learning_rate": 1.6153846153846157e-06,
|
| 5347 |
+
"loss": 0.0007,
|
| 5348 |
+
"step": 18425
|
| 5349 |
+
},
|
| 5350 |
+
{
|
| 5351 |
+
"epoch": 20.319383259911895,
|
| 5352 |
+
"grad_norm": 0.009516764432191849,
|
| 5353 |
+
"learning_rate": 1.5897435897435897e-06,
|
| 5354 |
+
"loss": 0.0029,
|
| 5355 |
+
"step": 18450
|
| 5356 |
+
},
|
| 5357 |
+
{
|
| 5358 |
+
"epoch": 20.34691629955947,
|
| 5359 |
+
"grad_norm": 0.003727905685082078,
|
| 5360 |
+
"learning_rate": 1.5641025641025642e-06,
|
| 5361 |
+
"loss": 0.0008,
|
| 5362 |
+
"step": 18475
|
| 5363 |
+
},
|
| 5364 |
+
{
|
| 5365 |
+
"epoch": 20.374449339207047,
|
| 5366 |
+
"grad_norm": 0.006022660061717033,
|
| 5367 |
+
"learning_rate": 1.5384615384615387e-06,
|
| 5368 |
+
"loss": 0.002,
|
| 5369 |
+
"step": 18500
|
| 5370 |
+
},
|
| 5371 |
+
{
|
| 5372 |
+
"epoch": 20.401982378854626,
|
| 5373 |
+
"grad_norm": 0.004205208737403154,
|
| 5374 |
+
"learning_rate": 1.5128205128205128e-06,
|
| 5375 |
+
"loss": 0.001,
|
| 5376 |
+
"step": 18525
|
| 5377 |
+
},
|
| 5378 |
+
{
|
| 5379 |
+
"epoch": 20.429515418502202,
|
| 5380 |
+
"grad_norm": 0.10070935636758804,
|
| 5381 |
+
"learning_rate": 1.4871794871794873e-06,
|
| 5382 |
+
"loss": 0.0009,
|
| 5383 |
+
"step": 18550
|
| 5384 |
+
},
|
| 5385 |
+
{
|
| 5386 |
+
"epoch": 20.457048458149778,
|
| 5387 |
+
"grad_norm": 0.004871605895459652,
|
| 5388 |
+
"learning_rate": 1.4615384615384618e-06,
|
| 5389 |
+
"loss": 0.0009,
|
| 5390 |
+
"step": 18575
|
| 5391 |
+
},
|
| 5392 |
+
{
|
| 5393 |
+
"epoch": 20.484581497797357,
|
| 5394 |
+
"grad_norm": 0.005528348032385111,
|
| 5395 |
+
"learning_rate": 1.4358974358974359e-06,
|
| 5396 |
+
"loss": 0.0008,
|
| 5397 |
+
"step": 18600
|
| 5398 |
+
},
|
| 5399 |
+
{
|
| 5400 |
+
"epoch": 20.512114537444933,
|
| 5401 |
+
"grad_norm": 0.007922505959868431,
|
| 5402 |
+
"learning_rate": 1.4102564102564104e-06,
|
| 5403 |
+
"loss": 0.0007,
|
| 5404 |
+
"step": 18625
|
| 5405 |
+
},
|
| 5406 |
+
{
|
| 5407 |
+
"epoch": 20.53964757709251,
|
| 5408 |
+
"grad_norm": 0.004503941163420677,
|
| 5409 |
+
"learning_rate": 1.3846153846153848e-06,
|
| 5410 |
+
"loss": 0.001,
|
| 5411 |
+
"step": 18650
|
| 5412 |
+
},
|
| 5413 |
+
{
|
| 5414 |
+
"epoch": 20.56718061674009,
|
| 5415 |
+
"grad_norm": 0.04012945666909218,
|
| 5416 |
+
"learning_rate": 1.358974358974359e-06,
|
| 5417 |
+
"loss": 0.0011,
|
| 5418 |
+
"step": 18675
|
| 5419 |
+
},
|
| 5420 |
+
{
|
| 5421 |
+
"epoch": 20.594713656387665,
|
| 5422 |
+
"grad_norm": 0.011533623561263084,
|
| 5423 |
+
"learning_rate": 1.3333333333333334e-06,
|
| 5424 |
+
"loss": 0.0011,
|
| 5425 |
+
"step": 18700
|
| 5426 |
+
},
|
| 5427 |
+
{
|
| 5428 |
+
"epoch": 20.62224669603524,
|
| 5429 |
+
"grad_norm": 0.008248466067016125,
|
| 5430 |
+
"learning_rate": 1.307692307692308e-06,
|
| 5431 |
+
"loss": 0.0009,
|
| 5432 |
+
"step": 18725
|
| 5433 |
+
},
|
| 5434 |
+
{
|
| 5435 |
+
"epoch": 20.64977973568282,
|
| 5436 |
+
"grad_norm": 0.004799861926585436,
|
| 5437 |
+
"learning_rate": 1.282051282051282e-06,
|
| 5438 |
+
"loss": 0.0007,
|
| 5439 |
+
"step": 18750
|
| 5440 |
+
},
|
| 5441 |
+
{
|
| 5442 |
+
"epoch": 20.677312775330396,
|
| 5443 |
+
"grad_norm": 0.006359547842293978,
|
| 5444 |
+
"learning_rate": 1.2564102564102565e-06,
|
| 5445 |
+
"loss": 0.0007,
|
| 5446 |
+
"step": 18775
|
| 5447 |
+
},
|
| 5448 |
+
{
|
| 5449 |
+
"epoch": 20.704845814977972,
|
| 5450 |
+
"grad_norm": 0.006216075737029314,
|
| 5451 |
+
"learning_rate": 1.230769230769231e-06,
|
| 5452 |
+
"loss": 0.001,
|
| 5453 |
+
"step": 18800
|
| 5454 |
+
},
|
| 5455 |
+
{
|
| 5456 |
+
"epoch": 20.73237885462555,
|
| 5457 |
+
"grad_norm": 0.08518233150243759,
|
| 5458 |
+
"learning_rate": 1.2051282051282053e-06,
|
| 5459 |
+
"loss": 0.0012,
|
| 5460 |
+
"step": 18825
|
| 5461 |
+
},
|
| 5462 |
+
{
|
| 5463 |
+
"epoch": 20.759911894273127,
|
| 5464 |
+
"grad_norm": 0.004133372101932764,
|
| 5465 |
+
"learning_rate": 1.1794871794871795e-06,
|
| 5466 |
+
"loss": 0.001,
|
| 5467 |
+
"step": 18850
|
| 5468 |
+
},
|
| 5469 |
+
{
|
| 5470 |
+
"epoch": 20.787444933920703,
|
| 5471 |
+
"grad_norm": 0.006971430499106646,
|
| 5472 |
+
"learning_rate": 1.153846153846154e-06,
|
| 5473 |
+
"loss": 0.0014,
|
| 5474 |
+
"step": 18875
|
| 5475 |
+
},
|
| 5476 |
+
{
|
| 5477 |
+
"epoch": 20.814977973568283,
|
| 5478 |
+
"grad_norm": 0.005109596531838179,
|
| 5479 |
+
"learning_rate": 1.1282051282051283e-06,
|
| 5480 |
+
"loss": 0.0011,
|
| 5481 |
+
"step": 18900
|
| 5482 |
+
},
|
| 5483 |
+
{
|
| 5484 |
+
"epoch": 20.84251101321586,
|
| 5485 |
+
"grad_norm": 0.038249921053647995,
|
| 5486 |
+
"learning_rate": 1.1025641025641026e-06,
|
| 5487 |
+
"loss": 0.0012,
|
| 5488 |
+
"step": 18925
|
| 5489 |
+
},
|
| 5490 |
+
{
|
| 5491 |
+
"epoch": 20.870044052863435,
|
| 5492 |
+
"grad_norm": 0.008875112980604172,
|
| 5493 |
+
"learning_rate": 1.076923076923077e-06,
|
| 5494 |
+
"loss": 0.0007,
|
| 5495 |
+
"step": 18950
|
| 5496 |
+
},
|
| 5497 |
+
{
|
| 5498 |
+
"epoch": 20.897577092511014,
|
| 5499 |
+
"grad_norm": 0.0044938609935343266,
|
| 5500 |
+
"learning_rate": 1.0512820512820514e-06,
|
| 5501 |
+
"loss": 0.0011,
|
| 5502 |
+
"step": 18975
|
| 5503 |
+
},
|
| 5504 |
+
{
|
| 5505 |
+
"epoch": 20.92511013215859,
|
| 5506 |
+
"grad_norm": 0.07247400283813477,
|
| 5507 |
+
"learning_rate": 1.0256410256410257e-06,
|
| 5508 |
+
"loss": 0.0008,
|
| 5509 |
+
"step": 19000
|
| 5510 |
+
},
|
| 5511 |
+
{
|
| 5512 |
+
"epoch": 20.92511013215859,
|
| 5513 |
+
"eval_cer": 22.58778214666468,
|
| 5514 |
+
"eval_loss": 0.892371654510498,
|
| 5515 |
+
"eval_runtime": 1719.93,
|
| 5516 |
+
"eval_samples_per_second": 6.152,
|
| 5517 |
+
"eval_steps_per_second": 1.538,
|
| 5518 |
+
"eval_wer": 82.47996228194248,
|
| 5519 |
+
"step": 19000
|
| 5520 |
+
},
|
| 5521 |
+
{
|
| 5522 |
+
"epoch": 20.952643171806166,
|
| 5523 |
+
"grad_norm": 0.006040550768375397,
|
| 5524 |
+
"learning_rate": 1.0000000000000002e-06,
|
| 5525 |
+
"loss": 0.0007,
|
| 5526 |
+
"step": 19025
|
| 5527 |
+
},
|
| 5528 |
+
{
|
| 5529 |
+
"epoch": 20.980176211453745,
|
| 5530 |
+
"grad_norm": 0.00338306394405663,
|
| 5531 |
+
"learning_rate": 9.743589743589745e-07,
|
| 5532 |
+
"loss": 0.001,
|
| 5533 |
+
"step": 19050
|
| 5534 |
+
},
|
| 5535 |
+
{
|
| 5536 |
+
"epoch": 21.00770925110132,
|
| 5537 |
+
"grad_norm": 0.007667516358196735,
|
| 5538 |
+
"learning_rate": 9.487179487179487e-07,
|
| 5539 |
+
"loss": 0.0012,
|
| 5540 |
+
"step": 19075
|
| 5541 |
+
},
|
| 5542 |
+
{
|
| 5543 |
+
"epoch": 21.035242290748897,
|
| 5544 |
+
"grad_norm": 0.0036987056955695152,
|
| 5545 |
+
"learning_rate": 9.230769230769232e-07,
|
| 5546 |
+
"loss": 0.0006,
|
| 5547 |
+
"step": 19100
|
| 5548 |
+
},
|
| 5549 |
+
{
|
| 5550 |
+
"epoch": 21.062775330396477,
|
| 5551 |
+
"grad_norm": 0.0036683231592178345,
|
| 5552 |
+
"learning_rate": 8.974358974358975e-07,
|
| 5553 |
+
"loss": 0.0011,
|
| 5554 |
+
"step": 19125
|
| 5555 |
+
},
|
| 5556 |
+
{
|
| 5557 |
+
"epoch": 21.090308370044053,
|
| 5558 |
+
"grad_norm": 0.007168483920395374,
|
| 5559 |
+
"learning_rate": 8.717948717948718e-07,
|
| 5560 |
+
"loss": 0.0009,
|
| 5561 |
+
"step": 19150
|
| 5562 |
+
},
|
| 5563 |
+
{
|
| 5564 |
+
"epoch": 21.11784140969163,
|
| 5565 |
+
"grad_norm": 0.0029900213703513145,
|
| 5566 |
+
"learning_rate": 8.461538461538463e-07,
|
| 5567 |
+
"loss": 0.0017,
|
| 5568 |
+
"step": 19175
|
| 5569 |
+
},
|
| 5570 |
+
{
|
| 5571 |
+
"epoch": 21.145374449339208,
|
| 5572 |
+
"grad_norm": 0.00418079923838377,
|
| 5573 |
+
"learning_rate": 8.205128205128206e-07,
|
| 5574 |
+
"loss": 0.0009,
|
| 5575 |
+
"step": 19200
|
| 5576 |
+
},
|
| 5577 |
+
{
|
| 5578 |
+
"epoch": 21.172907488986784,
|
| 5579 |
+
"grad_norm": 0.003424633527174592,
|
| 5580 |
+
"learning_rate": 7.948717948717949e-07,
|
| 5581 |
+
"loss": 0.0007,
|
| 5582 |
+
"step": 19225
|
| 5583 |
+
},
|
| 5584 |
+
{
|
| 5585 |
+
"epoch": 21.20044052863436,
|
| 5586 |
+
"grad_norm": 0.0028422100003808737,
|
| 5587 |
+
"learning_rate": 7.692307692307694e-07,
|
| 5588 |
+
"loss": 0.0006,
|
| 5589 |
+
"step": 19250
|
| 5590 |
+
},
|
| 5591 |
+
{
|
| 5592 |
+
"epoch": 21.22797356828194,
|
| 5593 |
+
"grad_norm": 0.004691548179835081,
|
| 5594 |
+
"learning_rate": 7.435897435897436e-07,
|
| 5595 |
+
"loss": 0.0006,
|
| 5596 |
+
"step": 19275
|
| 5597 |
+
},
|
| 5598 |
+
{
|
| 5599 |
+
"epoch": 21.255506607929515,
|
| 5600 |
+
"grad_norm": 0.004589064046740532,
|
| 5601 |
+
"learning_rate": 7.179487179487179e-07,
|
| 5602 |
+
"loss": 0.0005,
|
| 5603 |
+
"step": 19300
|
| 5604 |
+
},
|
| 5605 |
+
{
|
| 5606 |
+
"epoch": 21.28303964757709,
|
| 5607 |
+
"grad_norm": 0.005557245574891567,
|
| 5608 |
+
"learning_rate": 6.923076923076924e-07,
|
| 5609 |
+
"loss": 0.0011,
|
| 5610 |
+
"step": 19325
|
| 5611 |
+
},
|
| 5612 |
+
{
|
| 5613 |
+
"epoch": 21.31057268722467,
|
| 5614 |
+
"grad_norm": 0.0031431138049811125,
|
| 5615 |
+
"learning_rate": 6.666666666666667e-07,
|
| 5616 |
+
"loss": 0.0006,
|
| 5617 |
+
"step": 19350
|
| 5618 |
+
},
|
| 5619 |
+
{
|
| 5620 |
+
"epoch": 21.338105726872246,
|
| 5621 |
+
"grad_norm": 0.004688850603997707,
|
| 5622 |
+
"learning_rate": 6.41025641025641e-07,
|
| 5623 |
+
"loss": 0.0007,
|
| 5624 |
+
"step": 19375
|
| 5625 |
+
},
|
| 5626 |
+
{
|
| 5627 |
+
"epoch": 21.365638766519822,
|
| 5628 |
+
"grad_norm": 0.007398667279630899,
|
| 5629 |
+
"learning_rate": 6.153846153846155e-07,
|
| 5630 |
+
"loss": 0.0006,
|
| 5631 |
+
"step": 19400
|
| 5632 |
+
},
|
| 5633 |
+
{
|
| 5634 |
+
"epoch": 21.393171806167402,
|
| 5635 |
+
"grad_norm": 0.005217025522142649,
|
| 5636 |
+
"learning_rate": 5.897435897435898e-07,
|
| 5637 |
+
"loss": 0.0008,
|
| 5638 |
+
"step": 19425
|
| 5639 |
+
},
|
| 5640 |
+
{
|
| 5641 |
+
"epoch": 21.420704845814978,
|
| 5642 |
+
"grad_norm": 0.004331599920988083,
|
| 5643 |
+
"learning_rate": 5.641025641025642e-07,
|
| 5644 |
+
"loss": 0.0006,
|
| 5645 |
+
"step": 19450
|
| 5646 |
+
},
|
| 5647 |
+
{
|
| 5648 |
+
"epoch": 21.448237885462554,
|
| 5649 |
+
"grad_norm": 0.004927519708871841,
|
| 5650 |
+
"learning_rate": 5.384615384615386e-07,
|
| 5651 |
+
"loss": 0.0009,
|
| 5652 |
+
"step": 19475
|
| 5653 |
+
},
|
| 5654 |
+
{
|
| 5655 |
+
"epoch": 21.475770925110133,
|
| 5656 |
+
"grad_norm": 0.0034796635154634714,
|
| 5657 |
+
"learning_rate": 5.128205128205128e-07,
|
| 5658 |
+
"loss": 0.001,
|
| 5659 |
+
"step": 19500
|
| 5660 |
+
},
|
| 5661 |
+
{
|
| 5662 |
+
"epoch": 21.50330396475771,
|
| 5663 |
+
"grad_norm": 0.00347193144261837,
|
| 5664 |
+
"learning_rate": 4.871794871794872e-07,
|
| 5665 |
+
"loss": 0.0006,
|
| 5666 |
+
"step": 19525
|
| 5667 |
+
},
|
| 5668 |
+
{
|
| 5669 |
+
"epoch": 21.530837004405285,
|
| 5670 |
+
"grad_norm": 0.0074023474007844925,
|
| 5671 |
+
"learning_rate": 4.615384615384616e-07,
|
| 5672 |
+
"loss": 0.0006,
|
| 5673 |
+
"step": 19550
|
| 5674 |
+
},
|
| 5675 |
+
{
|
| 5676 |
+
"epoch": 21.558370044052865,
|
| 5677 |
+
"grad_norm": 0.0036716184113174677,
|
| 5678 |
+
"learning_rate": 4.358974358974359e-07,
|
| 5679 |
+
"loss": 0.0006,
|
| 5680 |
+
"step": 19575
|
| 5681 |
+
},
|
| 5682 |
+
{
|
| 5683 |
+
"epoch": 21.58590308370044,
|
| 5684 |
+
"grad_norm": 0.006558453664183617,
|
| 5685 |
+
"learning_rate": 4.102564102564103e-07,
|
| 5686 |
+
"loss": 0.0007,
|
| 5687 |
+
"step": 19600
|
| 5688 |
+
},
|
| 5689 |
+
{
|
| 5690 |
+
"epoch": 21.613436123348016,
|
| 5691 |
+
"grad_norm": 0.0030144904740154743,
|
| 5692 |
+
"learning_rate": 3.846153846153847e-07,
|
| 5693 |
+
"loss": 0.0007,
|
| 5694 |
+
"step": 19625
|
| 5695 |
+
},
|
| 5696 |
+
{
|
| 5697 |
+
"epoch": 21.640969162995596,
|
| 5698 |
+
"grad_norm": 0.0037687935400754213,
|
| 5699 |
+
"learning_rate": 3.5897435897435896e-07,
|
| 5700 |
+
"loss": 0.0007,
|
| 5701 |
+
"step": 19650
|
| 5702 |
+
},
|
| 5703 |
+
{
|
| 5704 |
+
"epoch": 21.66850220264317,
|
| 5705 |
+
"grad_norm": 0.0722261294722557,
|
| 5706 |
+
"learning_rate": 3.3333333333333335e-07,
|
| 5707 |
+
"loss": 0.0007,
|
| 5708 |
+
"step": 19675
|
| 5709 |
+
},
|
| 5710 |
+
{
|
| 5711 |
+
"epoch": 21.696035242290748,
|
| 5712 |
+
"grad_norm": 0.0034861781168729067,
|
| 5713 |
+
"learning_rate": 3.0769230769230774e-07,
|
| 5714 |
+
"loss": 0.0007,
|
| 5715 |
+
"step": 19700
|
| 5716 |
+
},
|
| 5717 |
+
{
|
| 5718 |
+
"epoch": 21.723568281938327,
|
| 5719 |
+
"grad_norm": 0.004740406293421984,
|
| 5720 |
+
"learning_rate": 2.820512820512821e-07,
|
| 5721 |
+
"loss": 0.0009,
|
| 5722 |
+
"step": 19725
|
| 5723 |
+
},
|
| 5724 |
+
{
|
| 5725 |
+
"epoch": 21.751101321585903,
|
| 5726 |
+
"grad_norm": 0.0040426794439554214,
|
| 5727 |
+
"learning_rate": 2.564102564102564e-07,
|
| 5728 |
+
"loss": 0.0007,
|
| 5729 |
+
"step": 19750
|
| 5730 |
+
},
|
| 5731 |
+
{
|
| 5732 |
+
"epoch": 21.77863436123348,
|
| 5733 |
+
"grad_norm": 0.005103557836264372,
|
| 5734 |
+
"learning_rate": 2.307692307692308e-07,
|
| 5735 |
+
"loss": 0.0006,
|
| 5736 |
+
"step": 19775
|
| 5737 |
+
},
|
| 5738 |
+
{
|
| 5739 |
+
"epoch": 21.80616740088106,
|
| 5740 |
+
"grad_norm": 0.007594733498990536,
|
| 5741 |
+
"learning_rate": 2.0512820512820514e-07,
|
| 5742 |
+
"loss": 0.0006,
|
| 5743 |
+
"step": 19800
|
| 5744 |
+
},
|
| 5745 |
+
{
|
| 5746 |
+
"epoch": 21.833700440528634,
|
| 5747 |
+
"grad_norm": 0.004270041361451149,
|
| 5748 |
+
"learning_rate": 1.7948717948717948e-07,
|
| 5749 |
+
"loss": 0.0007,
|
| 5750 |
+
"step": 19825
|
| 5751 |
+
},
|
| 5752 |
+
{
|
| 5753 |
+
"epoch": 21.86123348017621,
|
| 5754 |
+
"grad_norm": 0.00658000260591507,
|
| 5755 |
+
"learning_rate": 1.5384615384615387e-07,
|
| 5756 |
+
"loss": 0.0006,
|
| 5757 |
+
"step": 19850
|
| 5758 |
+
},
|
| 5759 |
+
{
|
| 5760 |
+
"epoch": 21.88876651982379,
|
| 5761 |
+
"grad_norm": 0.004829788114875555,
|
| 5762 |
+
"learning_rate": 1.282051282051282e-07,
|
| 5763 |
+
"loss": 0.0005,
|
| 5764 |
+
"step": 19875
|
| 5765 |
+
},
|
| 5766 |
+
{
|
| 5767 |
+
"epoch": 21.916299559471366,
|
| 5768 |
+
"grad_norm": 0.004017261788249016,
|
| 5769 |
+
"learning_rate": 1.0256410256410257e-07,
|
| 5770 |
+
"loss": 0.0006,
|
| 5771 |
+
"step": 19900
|
| 5772 |
+
},
|
| 5773 |
+
{
|
| 5774 |
+
"epoch": 21.94383259911894,
|
| 5775 |
+
"grad_norm": 0.005543394014239311,
|
| 5776 |
+
"learning_rate": 7.692307692307694e-08,
|
| 5777 |
+
"loss": 0.0009,
|
| 5778 |
+
"step": 19925
|
| 5779 |
+
},
|
| 5780 |
+
{
|
| 5781 |
+
"epoch": 21.97136563876652,
|
| 5782 |
+
"grad_norm": 0.006894242484122515,
|
| 5783 |
+
"learning_rate": 5.1282051282051286e-08,
|
| 5784 |
+
"loss": 0.0006,
|
| 5785 |
+
"step": 19950
|
| 5786 |
+
},
|
| 5787 |
+
{
|
| 5788 |
+
"epoch": 21.998898678414097,
|
| 5789 |
+
"grad_norm": 0.004000292159616947,
|
| 5790 |
+
"learning_rate": 2.5641025641025643e-08,
|
| 5791 |
+
"loss": 0.0007,
|
| 5792 |
+
"step": 19975
|
| 5793 |
+
},
|
| 5794 |
+
{
|
| 5795 |
+
"epoch": 22.026431718061673,
|
| 5796 |
+
"grad_norm": 0.004270936828106642,
|
| 5797 |
+
"learning_rate": 0.0,
|
| 5798 |
+
"loss": 0.0006,
|
| 5799 |
+
"step": 20000
|
| 5800 |
+
},
|
| 5801 |
+
{
|
| 5802 |
+
"epoch": 22.026431718061673,
|
| 5803 |
+
"eval_cer": 22.62675822223241,
|
| 5804 |
+
"eval_loss": 0.8947405219078064,
|
| 5805 |
+
"eval_runtime": 1706.5603,
|
| 5806 |
+
"eval_samples_per_second": 6.2,
|
| 5807 |
+
"eval_steps_per_second": 1.55,
|
| 5808 |
+
"eval_wer": 82.34794908062236,
|
| 5809 |
+
"step": 20000
|
| 5810 |
}
|
| 5811 |
],
|
| 5812 |
"logging_steps": 25,
|
|
|
|
| 5821 |
"should_evaluate": false,
|
| 5822 |
"should_log": false,
|
| 5823 |
"should_save": true,
|
| 5824 |
+
"should_training_stop": true
|
| 5825 |
},
|
| 5826 |
"attributes": {}
|
| 5827 |
}
|
| 5828 |
},
|
| 5829 |
+
"total_flos": 3.4362863729801953e+20,
|
| 5830 |
"train_batch_size": 4,
|
| 5831 |
"trial_name": null,
|
| 5832 |
"trial_params": null
|