"auto-commit"
Browse files- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/rng_state.pth +2 -2
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/trainer_state.json +799 -4
- model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/training_args.bin +0 -0
- model-bin/finetune/base/log/1629894485.228881/events.out.tfevents.1629894485.7e498afd5545.7645.25 +3 -0
- model-bin/finetune/base/log/1629894958.0897994/events.out.tfevents.1629894958.7e498afd5545.7645.27 +3 -0
- model-bin/finetune/base/log/1629895430.286524/events.out.tfevents.1629895430.7e498afd5545.7645.29 +3 -0
- model-bin/finetune/base/log/1629895902.062021/events.out.tfevents.1629895902.7e498afd5545.7645.31 +3 -0
- model-bin/finetune/base/log/1629896362.1823292/events.out.tfevents.1629896362.7e498afd5545.7645.33 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629894485.7e498afd5545.7645.24 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629894957.7e498afd5545.7645.26 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629895430.7e498afd5545.7645.28 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629895902.7e498afd5545.7645.30 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629896362.7e498afd5545.7645.32 +3 -0
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61a8b0a7adecf1a8e9ffdcbc7ea1d0c430e0f4915a37015f6f24f65a184c4dcf
|
| 3 |
size 722165393
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8ed4f9f480dc04b7c1b9a03c96d135c3ea038740074fc4dbb931f7003319cc2
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08c2647acfb5b49bae969d7133c47291a13692196c268162e693bd3db7b0b8e5
|
| 3 |
+
size 14439
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dedda88080e30bde75dd07d24e3957d07cf65d683f3fcca5da54a4d262734ab
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c94b37533bc725b49922bd0401ee468e8837e745c87213756a34d1c1ab20c1c
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.18412114350410416,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-69565",
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -198321,11 +198321,806 @@
|
|
| 198321 |
"eval_steps_per_second": 0.635,
|
| 198322 |
"eval_wer": 0.19297325642370214,
|
| 198323 |
"step": 77779
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198324 |
}
|
| 198325 |
],
|
| 198326 |
-
"max_steps":
|
| 198327 |
"num_train_epochs": 5000,
|
| 198328 |
-
"total_flos": 2.
|
| 198329 |
"trial_name": null,
|
| 198330 |
"trial_params": null
|
| 198331 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.18412114350410416,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-69565",
|
| 4 |
+
"epoch": 627.0,
|
| 5 |
+
"global_step": 78401,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 198321 |
"eval_steps_per_second": 0.635,
|
| 198322 |
"eval_wer": 0.19297325642370214,
|
| 198323 |
"step": 77779
|
| 198324 |
+
},
|
| 198325 |
+
{
|
| 198326 |
+
"epoch": 627.01,
|
| 198327 |
+
"learning_rate": 8.760080775444265e-06,
|
| 198328 |
+
"loss": 0.4135,
|
| 198329 |
+
"step": 77780
|
| 198330 |
+
},
|
| 198331 |
+
{
|
| 198332 |
+
"epoch": 627.05,
|
| 198333 |
+
"learning_rate": 8.76e-06,
|
| 198334 |
+
"loss": 0.3354,
|
| 198335 |
+
"step": 77785
|
| 198336 |
+
},
|
| 198337 |
+
{
|
| 198338 |
+
"epoch": 627.09,
|
| 198339 |
+
"learning_rate": 8.759919224555737e-06,
|
| 198340 |
+
"loss": 0.2852,
|
| 198341 |
+
"step": 77790
|
| 198342 |
+
},
|
| 198343 |
+
{
|
| 198344 |
+
"epoch": 627.13,
|
| 198345 |
+
"learning_rate": 8.75983844911147e-06,
|
| 198346 |
+
"loss": 0.3816,
|
| 198347 |
+
"step": 77795
|
| 198348 |
+
},
|
| 198349 |
+
{
|
| 198350 |
+
"epoch": 627.17,
|
| 198351 |
+
"learning_rate": 8.759757673667207e-06,
|
| 198352 |
+
"loss": 0.5098,
|
| 198353 |
+
"step": 77800
|
| 198354 |
+
},
|
| 198355 |
+
{
|
| 198356 |
+
"epoch": 627.21,
|
| 198357 |
+
"learning_rate": 8.75967689822294e-06,
|
| 198358 |
+
"loss": 1.0766,
|
| 198359 |
+
"step": 77805
|
| 198360 |
+
},
|
| 198361 |
+
{
|
| 198362 |
+
"epoch": 627.25,
|
| 198363 |
+
"learning_rate": 8.759596122778677e-06,
|
| 198364 |
+
"loss": 0.3371,
|
| 198365 |
+
"step": 77810
|
| 198366 |
+
},
|
| 198367 |
+
{
|
| 198368 |
+
"epoch": 627.29,
|
| 198369 |
+
"learning_rate": 8.75951534733441e-06,
|
| 198370 |
+
"loss": 0.3084,
|
| 198371 |
+
"step": 77815
|
| 198372 |
+
},
|
| 198373 |
+
{
|
| 198374 |
+
"epoch": 627.33,
|
| 198375 |
+
"learning_rate": 8.759434571890147e-06,
|
| 198376 |
+
"loss": 0.3505,
|
| 198377 |
+
"step": 77820
|
| 198378 |
+
},
|
| 198379 |
+
{
|
| 198380 |
+
"epoch": 627.37,
|
| 198381 |
+
"learning_rate": 8.75935379644588e-06,
|
| 198382 |
+
"loss": 0.5241,
|
| 198383 |
+
"step": 77825
|
| 198384 |
+
},
|
| 198385 |
+
{
|
| 198386 |
+
"epoch": 627.41,
|
| 198387 |
+
"learning_rate": 8.759273021001617e-06,
|
| 198388 |
+
"loss": 1.1014,
|
| 198389 |
+
"step": 77830
|
| 198390 |
+
},
|
| 198391 |
+
{
|
| 198392 |
+
"epoch": 627.45,
|
| 198393 |
+
"learning_rate": 8.75919224555735e-06,
|
| 198394 |
+
"loss": 0.3313,
|
| 198395 |
+
"step": 77835
|
| 198396 |
+
},
|
| 198397 |
+
{
|
| 198398 |
+
"epoch": 627.49,
|
| 198399 |
+
"learning_rate": 8.759111470113087e-06,
|
| 198400 |
+
"loss": 0.3149,
|
| 198401 |
+
"step": 77840
|
| 198402 |
+
},
|
| 198403 |
+
{
|
| 198404 |
+
"epoch": 627.53,
|
| 198405 |
+
"learning_rate": 8.759030694668822e-06,
|
| 198406 |
+
"loss": 0.3667,
|
| 198407 |
+
"step": 77845
|
| 198408 |
+
},
|
| 198409 |
+
{
|
| 198410 |
+
"epoch": 627.57,
|
| 198411 |
+
"learning_rate": 8.758949919224557e-06,
|
| 198412 |
+
"loss": 0.5899,
|
| 198413 |
+
"step": 77850
|
| 198414 |
+
},
|
| 198415 |
+
{
|
| 198416 |
+
"epoch": 627.61,
|
| 198417 |
+
"learning_rate": 8.758885298869144e-06,
|
| 198418 |
+
"loss": 1.1831,
|
| 198419 |
+
"step": 77855
|
| 198420 |
+
},
|
| 198421 |
+
{
|
| 198422 |
+
"epoch": 627.65,
|
| 198423 |
+
"learning_rate": 8.75880452342488e-06,
|
| 198424 |
+
"loss": 0.3618,
|
| 198425 |
+
"step": 77860
|
| 198426 |
+
},
|
| 198427 |
+
{
|
| 198428 |
+
"epoch": 627.69,
|
| 198429 |
+
"learning_rate": 8.758723747980614e-06,
|
| 198430 |
+
"loss": 0.3628,
|
| 198431 |
+
"step": 77865
|
| 198432 |
+
},
|
| 198433 |
+
{
|
| 198434 |
+
"epoch": 627.73,
|
| 198435 |
+
"learning_rate": 8.75864297253635e-06,
|
| 198436 |
+
"loss": 0.3418,
|
| 198437 |
+
"step": 77870
|
| 198438 |
+
},
|
| 198439 |
+
{
|
| 198440 |
+
"epoch": 627.77,
|
| 198441 |
+
"learning_rate": 8.758562197092084e-06,
|
| 198442 |
+
"loss": 0.6136,
|
| 198443 |
+
"step": 77875
|
| 198444 |
+
},
|
| 198445 |
+
{
|
| 198446 |
+
"epoch": 627.81,
|
| 198447 |
+
"learning_rate": 8.75848142164782e-06,
|
| 198448 |
+
"loss": 1.1037,
|
| 198449 |
+
"step": 77880
|
| 198450 |
+
},
|
| 198451 |
+
{
|
| 198452 |
+
"epoch": 627.85,
|
| 198453 |
+
"learning_rate": 8.758400646203555e-06,
|
| 198454 |
+
"loss": 0.3263,
|
| 198455 |
+
"step": 77885
|
| 198456 |
+
},
|
| 198457 |
+
{
|
| 198458 |
+
"epoch": 627.89,
|
| 198459 |
+
"learning_rate": 8.75831987075929e-06,
|
| 198460 |
+
"loss": 0.3211,
|
| 198461 |
+
"step": 77890
|
| 198462 |
+
},
|
| 198463 |
+
{
|
| 198464 |
+
"epoch": 627.93,
|
| 198465 |
+
"learning_rate": 8.758239095315025e-06,
|
| 198466 |
+
"loss": 0.3483,
|
| 198467 |
+
"step": 77895
|
| 198468 |
+
},
|
| 198469 |
+
{
|
| 198470 |
+
"epoch": 627.97,
|
| 198471 |
+
"learning_rate": 8.75815831987076e-06,
|
| 198472 |
+
"loss": 0.6433,
|
| 198473 |
+
"step": 77900
|
| 198474 |
+
},
|
| 198475 |
+
{
|
| 198476 |
+
"epoch": 628.0,
|
| 198477 |
+
"eval_loss": 0.3645716905593872,
|
| 198478 |
+
"eval_runtime": 40.2657,
|
| 198479 |
+
"eval_samples_per_second": 20.762,
|
| 198480 |
+
"eval_steps_per_second": 0.671,
|
| 198481 |
+
"eval_wer": 0.19254476078661578,
|
| 198482 |
+
"step": 77903
|
| 198483 |
+
},
|
| 198484 |
+
{
|
| 198485 |
+
"epoch": 628.02,
|
| 198486 |
+
"learning_rate": 8.758077544426495e-06,
|
| 198487 |
+
"loss": 0.4821,
|
| 198488 |
+
"step": 77905
|
| 198489 |
+
},
|
| 198490 |
+
{
|
| 198491 |
+
"epoch": 628.06,
|
| 198492 |
+
"learning_rate": 8.75799676898223e-06,
|
| 198493 |
+
"loss": 0.3112,
|
| 198494 |
+
"step": 77910
|
| 198495 |
+
},
|
| 198496 |
+
{
|
| 198497 |
+
"epoch": 628.1,
|
| 198498 |
+
"learning_rate": 8.757915993537965e-06,
|
| 198499 |
+
"loss": 0.2826,
|
| 198500 |
+
"step": 77915
|
| 198501 |
+
},
|
| 198502 |
+
{
|
| 198503 |
+
"epoch": 628.14,
|
| 198504 |
+
"learning_rate": 8.7578352180937e-06,
|
| 198505 |
+
"loss": 0.4247,
|
| 198506 |
+
"step": 77920
|
| 198507 |
+
},
|
| 198508 |
+
{
|
| 198509 |
+
"epoch": 628.18,
|
| 198510 |
+
"learning_rate": 8.757754442649435e-06,
|
| 198511 |
+
"loss": 0.667,
|
| 198512 |
+
"step": 77925
|
| 198513 |
+
},
|
| 198514 |
+
{
|
| 198515 |
+
"epoch": 628.22,
|
| 198516 |
+
"learning_rate": 8.75767366720517e-06,
|
| 198517 |
+
"loss": 1.0662,
|
| 198518 |
+
"step": 77930
|
| 198519 |
+
},
|
| 198520 |
+
{
|
| 198521 |
+
"epoch": 628.26,
|
| 198522 |
+
"learning_rate": 8.757592891760905e-06,
|
| 198523 |
+
"loss": 0.3532,
|
| 198524 |
+
"step": 77935
|
| 198525 |
+
},
|
| 198526 |
+
{
|
| 198527 |
+
"epoch": 628.3,
|
| 198528 |
+
"learning_rate": 8.757512116316641e-06,
|
| 198529 |
+
"loss": 0.3435,
|
| 198530 |
+
"step": 77940
|
| 198531 |
+
},
|
| 198532 |
+
{
|
| 198533 |
+
"epoch": 628.34,
|
| 198534 |
+
"learning_rate": 8.757431340872375e-06,
|
| 198535 |
+
"loss": 0.3594,
|
| 198536 |
+
"step": 77945
|
| 198537 |
+
},
|
| 198538 |
+
{
|
| 198539 |
+
"epoch": 628.38,
|
| 198540 |
+
"learning_rate": 8.757350565428111e-06,
|
| 198541 |
+
"loss": 0.6636,
|
| 198542 |
+
"step": 77950
|
| 198543 |
+
},
|
| 198544 |
+
{
|
| 198545 |
+
"epoch": 628.42,
|
| 198546 |
+
"learning_rate": 8.757269789983845e-06,
|
| 198547 |
+
"loss": 1.106,
|
| 198548 |
+
"step": 77955
|
| 198549 |
+
},
|
| 198550 |
+
{
|
| 198551 |
+
"epoch": 628.46,
|
| 198552 |
+
"learning_rate": 8.757189014539581e-06,
|
| 198553 |
+
"loss": 0.2949,
|
| 198554 |
+
"step": 77960
|
| 198555 |
+
},
|
| 198556 |
+
{
|
| 198557 |
+
"epoch": 628.5,
|
| 198558 |
+
"learning_rate": 8.757108239095315e-06,
|
| 198559 |
+
"loss": 0.2952,
|
| 198560 |
+
"step": 77965
|
| 198561 |
+
},
|
| 198562 |
+
{
|
| 198563 |
+
"epoch": 628.54,
|
| 198564 |
+
"learning_rate": 8.757027463651051e-06,
|
| 198565 |
+
"loss": 0.3319,
|
| 198566 |
+
"step": 77970
|
| 198567 |
+
},
|
| 198568 |
+
{
|
| 198569 |
+
"epoch": 628.58,
|
| 198570 |
+
"learning_rate": 8.756946688206785e-06,
|
| 198571 |
+
"loss": 0.5626,
|
| 198572 |
+
"step": 77975
|
| 198573 |
+
},
|
| 198574 |
+
{
|
| 198575 |
+
"epoch": 628.62,
|
| 198576 |
+
"learning_rate": 8.756865912762521e-06,
|
| 198577 |
+
"loss": 1.0923,
|
| 198578 |
+
"step": 77980
|
| 198579 |
+
},
|
| 198580 |
+
{
|
| 198581 |
+
"epoch": 628.66,
|
| 198582 |
+
"learning_rate": 8.756785137318255e-06,
|
| 198583 |
+
"loss": 0.3112,
|
| 198584 |
+
"step": 77985
|
| 198585 |
+
},
|
| 198586 |
+
{
|
| 198587 |
+
"epoch": 628.7,
|
| 198588 |
+
"learning_rate": 8.756704361873991e-06,
|
| 198589 |
+
"loss": 0.33,
|
| 198590 |
+
"step": 77990
|
| 198591 |
+
},
|
| 198592 |
+
{
|
| 198593 |
+
"epoch": 628.74,
|
| 198594 |
+
"learning_rate": 8.756623586429725e-06,
|
| 198595 |
+
"loss": 0.3659,
|
| 198596 |
+
"step": 77995
|
| 198597 |
+
},
|
| 198598 |
+
{
|
| 198599 |
+
"epoch": 628.78,
|
| 198600 |
+
"learning_rate": 8.756542810985461e-06,
|
| 198601 |
+
"loss": 0.721,
|
| 198602 |
+
"step": 78000
|
| 198603 |
+
},
|
| 198604 |
+
{
|
| 198605 |
+
"epoch": 628.82,
|
| 198606 |
+
"learning_rate": 8.756462035541197e-06,
|
| 198607 |
+
"loss": 1.0558,
|
| 198608 |
+
"step": 78005
|
| 198609 |
+
},
|
| 198610 |
+
{
|
| 198611 |
+
"epoch": 628.86,
|
| 198612 |
+
"learning_rate": 8.756381260096931e-06,
|
| 198613 |
+
"loss": 0.3329,
|
| 198614 |
+
"step": 78010
|
| 198615 |
+
},
|
| 198616 |
+
{
|
| 198617 |
+
"epoch": 628.9,
|
| 198618 |
+
"learning_rate": 8.756300484652667e-06,
|
| 198619 |
+
"loss": 0.3351,
|
| 198620 |
+
"step": 78015
|
| 198621 |
+
},
|
| 198622 |
+
{
|
| 198623 |
+
"epoch": 628.94,
|
| 198624 |
+
"learning_rate": 8.756219709208401e-06,
|
| 198625 |
+
"loss": 0.3965,
|
| 198626 |
+
"step": 78020
|
| 198627 |
+
},
|
| 198628 |
+
{
|
| 198629 |
+
"epoch": 628.98,
|
| 198630 |
+
"learning_rate": 8.756138933764137e-06,
|
| 198631 |
+
"loss": 0.9025,
|
| 198632 |
+
"step": 78025
|
| 198633 |
+
},
|
| 198634 |
+
{
|
| 198635 |
+
"epoch": 629.0,
|
| 198636 |
+
"eval_loss": 0.38793015480041504,
|
| 198637 |
+
"eval_runtime": 42.1659,
|
| 198638 |
+
"eval_samples_per_second": 19.826,
|
| 198639 |
+
"eval_steps_per_second": 0.64,
|
| 198640 |
+
"eval_wer": 0.18838211261328183,
|
| 198641 |
+
"step": 78027
|
| 198642 |
+
},
|
| 198643 |
+
{
|
| 198644 |
+
"epoch": 624.02,
|
| 198645 |
+
"learning_rate": 8.756058158319871e-06,
|
| 198646 |
+
"loss": 0.4614,
|
| 198647 |
+
"step": 78030
|
| 198648 |
+
},
|
| 198649 |
+
{
|
| 198650 |
+
"epoch": 624.06,
|
| 198651 |
+
"learning_rate": 8.755977382875607e-06,
|
| 198652 |
+
"loss": 0.2992,
|
| 198653 |
+
"step": 78035
|
| 198654 |
+
},
|
| 198655 |
+
{
|
| 198656 |
+
"epoch": 624.1,
|
| 198657 |
+
"learning_rate": 8.755896607431341e-06,
|
| 198658 |
+
"loss": 0.3325,
|
| 198659 |
+
"step": 78040
|
| 198660 |
+
},
|
| 198661 |
+
{
|
| 198662 |
+
"epoch": 624.14,
|
| 198663 |
+
"learning_rate": 8.755815831987077e-06,
|
| 198664 |
+
"loss": 0.3556,
|
| 198665 |
+
"step": 78045
|
| 198666 |
+
},
|
| 198667 |
+
{
|
| 198668 |
+
"epoch": 624.18,
|
| 198669 |
+
"learning_rate": 8.755735056542811e-06,
|
| 198670 |
+
"loss": 0.7934,
|
| 198671 |
+
"step": 78050
|
| 198672 |
+
},
|
| 198673 |
+
{
|
| 198674 |
+
"epoch": 624.22,
|
| 198675 |
+
"learning_rate": 8.755654281098547e-06,
|
| 198676 |
+
"loss": 0.9644,
|
| 198677 |
+
"step": 78055
|
| 198678 |
+
},
|
| 198679 |
+
{
|
| 198680 |
+
"epoch": 624.26,
|
| 198681 |
+
"learning_rate": 8.755573505654283e-06,
|
| 198682 |
+
"loss": 0.3234,
|
| 198683 |
+
"step": 78060
|
| 198684 |
+
},
|
| 198685 |
+
{
|
| 198686 |
+
"epoch": 624.3,
|
| 198687 |
+
"learning_rate": 8.755492730210017e-06,
|
| 198688 |
+
"loss": 0.3082,
|
| 198689 |
+
"step": 78065
|
| 198690 |
+
},
|
| 198691 |
+
{
|
| 198692 |
+
"epoch": 624.34,
|
| 198693 |
+
"learning_rate": 8.755411954765753e-06,
|
| 198694 |
+
"loss": 0.3845,
|
| 198695 |
+
"step": 78070
|
| 198696 |
+
},
|
| 198697 |
+
{
|
| 198698 |
+
"epoch": 624.38,
|
| 198699 |
+
"learning_rate": 8.755331179321487e-06,
|
| 198700 |
+
"loss": 0.79,
|
| 198701 |
+
"step": 78075
|
| 198702 |
+
},
|
| 198703 |
+
{
|
| 198704 |
+
"epoch": 624.42,
|
| 198705 |
+
"learning_rate": 8.755250403877223e-06,
|
| 198706 |
+
"loss": 0.9148,
|
| 198707 |
+
"step": 78080
|
| 198708 |
+
},
|
| 198709 |
+
{
|
| 198710 |
+
"epoch": 624.46,
|
| 198711 |
+
"learning_rate": 8.755169628432957e-06,
|
| 198712 |
+
"loss": 0.3099,
|
| 198713 |
+
"step": 78085
|
| 198714 |
+
},
|
| 198715 |
+
{
|
| 198716 |
+
"epoch": 624.5,
|
| 198717 |
+
"learning_rate": 8.755088852988692e-06,
|
| 198718 |
+
"loss": 0.3206,
|
| 198719 |
+
"step": 78090
|
| 198720 |
+
},
|
| 198721 |
+
{
|
| 198722 |
+
"epoch": 624.54,
|
| 198723 |
+
"learning_rate": 8.755008077544427e-06,
|
| 198724 |
+
"loss": 0.3842,
|
| 198725 |
+
"step": 78095
|
| 198726 |
+
},
|
| 198727 |
+
{
|
| 198728 |
+
"epoch": 624.58,
|
| 198729 |
+
"learning_rate": 8.754927302100162e-06,
|
| 198730 |
+
"loss": 0.8316,
|
| 198731 |
+
"step": 78100
|
| 198732 |
+
},
|
| 198733 |
+
{
|
| 198734 |
+
"epoch": 624.62,
|
| 198735 |
+
"learning_rate": 8.754846526655897e-06,
|
| 198736 |
+
"loss": 0.7797,
|
| 198737 |
+
"step": 78105
|
| 198738 |
+
},
|
| 198739 |
+
{
|
| 198740 |
+
"epoch": 624.66,
|
| 198741 |
+
"learning_rate": 8.754765751211632e-06,
|
| 198742 |
+
"loss": 0.3066,
|
| 198743 |
+
"step": 78110
|
| 198744 |
+
},
|
| 198745 |
+
{
|
| 198746 |
+
"epoch": 624.7,
|
| 198747 |
+
"learning_rate": 8.754684975767368e-06,
|
| 198748 |
+
"loss": 0.2907,
|
| 198749 |
+
"step": 78115
|
| 198750 |
+
},
|
| 198751 |
+
{
|
| 198752 |
+
"epoch": 624.74,
|
| 198753 |
+
"learning_rate": 8.754604200323102e-06,
|
| 198754 |
+
"loss": 0.3628,
|
| 198755 |
+
"step": 78120
|
| 198756 |
+
},
|
| 198757 |
+
{
|
| 198758 |
+
"epoch": 624.78,
|
| 198759 |
+
"learning_rate": 8.754523424878838e-06,
|
| 198760 |
+
"loss": 0.8471,
|
| 198761 |
+
"step": 78125
|
| 198762 |
+
},
|
| 198763 |
+
{
|
| 198764 |
+
"epoch": 624.82,
|
| 198765 |
+
"learning_rate": 8.754442649434572e-06,
|
| 198766 |
+
"loss": 0.9309,
|
| 198767 |
+
"step": 78130
|
| 198768 |
+
},
|
| 198769 |
+
{
|
| 198770 |
+
"epoch": 624.86,
|
| 198771 |
+
"learning_rate": 8.754361873990308e-06,
|
| 198772 |
+
"loss": 0.354,
|
| 198773 |
+
"step": 78135
|
| 198774 |
+
},
|
| 198775 |
+
{
|
| 198776 |
+
"epoch": 624.9,
|
| 198777 |
+
"learning_rate": 8.754281098546042e-06,
|
| 198778 |
+
"loss": 0.328,
|
| 198779 |
+
"step": 78140
|
| 198780 |
+
},
|
| 198781 |
+
{
|
| 198782 |
+
"epoch": 624.94,
|
| 198783 |
+
"learning_rate": 8.754200323101778e-06,
|
| 198784 |
+
"loss": 0.3486,
|
| 198785 |
+
"step": 78145
|
| 198786 |
+
},
|
| 198787 |
+
{
|
| 198788 |
+
"epoch": 624.98,
|
| 198789 |
+
"learning_rate": 8.754119547657512e-06,
|
| 198790 |
+
"loss": 0.7785,
|
| 198791 |
+
"step": 78150
|
| 198792 |
+
},
|
| 198793 |
+
{
|
| 198794 |
+
"epoch": 625.0,
|
| 198795 |
+
"eval_loss": 0.40157103538513184,
|
| 198796 |
+
"eval_runtime": 40.7591,
|
| 198797 |
+
"eval_samples_per_second": 20.511,
|
| 198798 |
+
"eval_steps_per_second": 0.662,
|
| 198799 |
+
"eval_wer": 0.18420859346309218,
|
| 198800 |
+
"step": 78152
|
| 198801 |
+
},
|
| 198802 |
+
{
|
| 198803 |
+
"epoch": 630.02,
|
| 198804 |
+
"learning_rate": 8.754038772213248e-06,
|
| 198805 |
+
"loss": 0.3666,
|
| 198806 |
+
"step": 78155
|
| 198807 |
+
},
|
| 198808 |
+
{
|
| 198809 |
+
"epoch": 630.06,
|
| 198810 |
+
"learning_rate": 8.753957996768982e-06,
|
| 198811 |
+
"loss": 0.2648,
|
| 198812 |
+
"step": 78160
|
| 198813 |
+
},
|
| 198814 |
+
{
|
| 198815 |
+
"epoch": 630.1,
|
| 198816 |
+
"learning_rate": 8.753877221324718e-06,
|
| 198817 |
+
"loss": 0.3182,
|
| 198818 |
+
"step": 78165
|
| 198819 |
+
},
|
| 198820 |
+
{
|
| 198821 |
+
"epoch": 630.14,
|
| 198822 |
+
"learning_rate": 8.753796445880452e-06,
|
| 198823 |
+
"loss": 0.4187,
|
| 198824 |
+
"step": 78170
|
| 198825 |
+
},
|
| 198826 |
+
{
|
| 198827 |
+
"epoch": 630.18,
|
| 198828 |
+
"learning_rate": 8.753715670436188e-06,
|
| 198829 |
+
"loss": 0.7981,
|
| 198830 |
+
"step": 78175
|
| 198831 |
+
},
|
| 198832 |
+
{
|
| 198833 |
+
"epoch": 630.22,
|
| 198834 |
+
"learning_rate": 8.753634894991924e-06,
|
| 198835 |
+
"loss": 0.941,
|
| 198836 |
+
"step": 78180
|
| 198837 |
+
},
|
| 198838 |
+
{
|
| 198839 |
+
"epoch": 630.27,
|
| 198840 |
+
"learning_rate": 8.753554119547658e-06,
|
| 198841 |
+
"loss": 0.2765,
|
| 198842 |
+
"step": 78185
|
| 198843 |
+
},
|
| 198844 |
+
{
|
| 198845 |
+
"epoch": 630.31,
|
| 198846 |
+
"learning_rate": 8.753473344103394e-06,
|
| 198847 |
+
"loss": 0.3282,
|
| 198848 |
+
"step": 78190
|
| 198849 |
+
},
|
| 198850 |
+
{
|
| 198851 |
+
"epoch": 630.35,
|
| 198852 |
+
"learning_rate": 8.753392568659128e-06,
|
| 198853 |
+
"loss": 0.3855,
|
| 198854 |
+
"step": 78195
|
| 198855 |
+
},
|
| 198856 |
+
{
|
| 198857 |
+
"epoch": 630.39,
|
| 198858 |
+
"learning_rate": 8.753311793214864e-06,
|
| 198859 |
+
"loss": 0.846,
|
| 198860 |
+
"step": 78200
|
| 198861 |
+
},
|
| 198862 |
+
{
|
| 198863 |
+
"epoch": 630.43,
|
| 198864 |
+
"learning_rate": 8.753247172859451e-06,
|
| 198865 |
+
"loss": 0.9732,
|
| 198866 |
+
"step": 78205
|
| 198867 |
+
},
|
| 198868 |
+
{
|
| 198869 |
+
"epoch": 630.47,
|
| 198870 |
+
"learning_rate": 8.753166397415187e-06,
|
| 198871 |
+
"loss": 0.2954,
|
| 198872 |
+
"step": 78210
|
| 198873 |
+
},
|
| 198874 |
+
{
|
| 198875 |
+
"epoch": 630.51,
|
| 198876 |
+
"learning_rate": 8.753085621970921e-06,
|
| 198877 |
+
"loss": 0.3381,
|
| 198878 |
+
"step": 78215
|
| 198879 |
+
},
|
| 198880 |
+
{
|
| 198881 |
+
"epoch": 630.55,
|
| 198882 |
+
"learning_rate": 8.753004846526657e-06,
|
| 198883 |
+
"loss": 0.3762,
|
| 198884 |
+
"step": 78220
|
| 198885 |
+
},
|
| 198886 |
+
{
|
| 198887 |
+
"epoch": 630.59,
|
| 198888 |
+
"learning_rate": 8.752924071082391e-06,
|
| 198889 |
+
"loss": 0.6709,
|
| 198890 |
+
"step": 78225
|
| 198891 |
+
},
|
| 198892 |
+
{
|
| 198893 |
+
"epoch": 630.63,
|
| 198894 |
+
"learning_rate": 8.752843295638127e-06,
|
| 198895 |
+
"loss": 0.8799,
|
| 198896 |
+
"step": 78230
|
| 198897 |
+
},
|
| 198898 |
+
{
|
| 198899 |
+
"epoch": 630.67,
|
| 198900 |
+
"learning_rate": 8.752762520193861e-06,
|
| 198901 |
+
"loss": 0.3267,
|
| 198902 |
+
"step": 78235
|
| 198903 |
+
},
|
| 198904 |
+
{
|
| 198905 |
+
"epoch": 630.71,
|
| 198906 |
+
"learning_rate": 8.752681744749597e-06,
|
| 198907 |
+
"loss": 0.2767,
|
| 198908 |
+
"step": 78240
|
| 198909 |
+
},
|
| 198910 |
+
{
|
| 198911 |
+
"epoch": 630.75,
|
| 198912 |
+
"learning_rate": 8.752600969305331e-06,
|
| 198913 |
+
"loss": 0.4106,
|
| 198914 |
+
"step": 78245
|
| 198915 |
+
},
|
| 198916 |
+
{
|
| 198917 |
+
"epoch": 630.79,
|
| 198918 |
+
"learning_rate": 8.752520193861067e-06,
|
| 198919 |
+
"loss": 0.8182,
|
| 198920 |
+
"step": 78250
|
| 198921 |
+
},
|
| 198922 |
+
{
|
| 198923 |
+
"epoch": 630.83,
|
| 198924 |
+
"learning_rate": 8.752439418416801e-06,
|
| 198925 |
+
"loss": 0.9572,
|
| 198926 |
+
"step": 78255
|
| 198927 |
+
},
|
| 198928 |
+
{
|
| 198929 |
+
"epoch": 630.87,
|
| 198930 |
+
"learning_rate": 8.752358642972537e-06,
|
| 198931 |
+
"loss": 0.2726,
|
| 198932 |
+
"step": 78260
|
| 198933 |
+
},
|
| 198934 |
+
{
|
| 198935 |
+
"epoch": 630.91,
|
| 198936 |
+
"learning_rate": 8.752277867528271e-06,
|
| 198937 |
+
"loss": 0.3419,
|
| 198938 |
+
"step": 78265
|
| 198939 |
+
},
|
| 198940 |
+
{
|
| 198941 |
+
"epoch": 630.95,
|
| 198942 |
+
"learning_rate": 8.752197092084007e-06,
|
| 198943 |
+
"loss": 0.4871,
|
| 198944 |
+
"step": 78270
|
| 198945 |
+
},
|
| 198946 |
+
{
|
| 198947 |
+
"epoch": 630.99,
|
| 198948 |
+
"learning_rate": 8.752116316639743e-06,
|
| 198949 |
+
"loss": 0.8531,
|
| 198950 |
+
"step": 78275
|
| 198951 |
+
},
|
| 198952 |
+
{
|
| 198953 |
+
"epoch": 631.0,
|
| 198954 |
+
"eval_loss": 0.3751949369907379,
|
| 198955 |
+
"eval_runtime": 41.6866,
|
| 198956 |
+
"eval_samples_per_second": 20.054,
|
| 198957 |
+
"eval_steps_per_second": 0.648,
|
| 198958 |
+
"eval_wer": 0.18990001449065352,
|
| 198959 |
+
"step": 78276
|
| 198960 |
+
},
|
| 198961 |
+
{
|
| 198962 |
+
"epoch": 626.03,
|
| 198963 |
+
"learning_rate": 8.752035541195477e-06,
|
| 198964 |
+
"loss": 0.3637,
|
| 198965 |
+
"step": 78280
|
| 198966 |
+
},
|
| 198967 |
+
{
|
| 198968 |
+
"epoch": 626.07,
|
| 198969 |
+
"learning_rate": 8.751954765751213e-06,
|
| 198970 |
+
"loss": 0.2422,
|
| 198971 |
+
"step": 78285
|
| 198972 |
+
},
|
| 198973 |
+
{
|
| 198974 |
+
"epoch": 626.11,
|
| 198975 |
+
"learning_rate": 8.751873990306947e-06,
|
| 198976 |
+
"loss": 0.382,
|
| 198977 |
+
"step": 78290
|
| 198978 |
+
},
|
| 198979 |
+
{
|
| 198980 |
+
"epoch": 626.15,
|
| 198981 |
+
"learning_rate": 8.751793214862683e-06,
|
| 198982 |
+
"loss": 0.5115,
|
| 198983 |
+
"step": 78295
|
| 198984 |
+
},
|
| 198985 |
+
{
|
| 198986 |
+
"epoch": 626.19,
|
| 198987 |
+
"learning_rate": 8.751712439418417e-06,
|
| 198988 |
+
"loss": 0.876,
|
| 198989 |
+
"step": 78300
|
| 198990 |
+
},
|
| 198991 |
+
{
|
| 198992 |
+
"epoch": 626.23,
|
| 198993 |
+
"learning_rate": 8.751631663974153e-06,
|
| 198994 |
+
"loss": 0.6659,
|
| 198995 |
+
"step": 78305
|
| 198996 |
+
},
|
| 198997 |
+
{
|
| 198998 |
+
"epoch": 626.27,
|
| 198999 |
+
"learning_rate": 8.751550888529887e-06,
|
| 199000 |
+
"loss": 0.2971,
|
| 199001 |
+
"step": 78310
|
| 199002 |
+
},
|
| 199003 |
+
{
|
| 199004 |
+
"epoch": 626.31,
|
| 199005 |
+
"learning_rate": 8.751470113085623e-06,
|
| 199006 |
+
"loss": 0.3347,
|
| 199007 |
+
"step": 78315
|
| 199008 |
+
},
|
| 199009 |
+
{
|
| 199010 |
+
"epoch": 626.35,
|
| 199011 |
+
"learning_rate": 8.751389337641357e-06,
|
| 199012 |
+
"loss": 0.3762,
|
| 199013 |
+
"step": 78320
|
| 199014 |
+
},
|
| 199015 |
+
{
|
| 199016 |
+
"epoch": 626.39,
|
| 199017 |
+
"learning_rate": 8.751308562197093e-06,
|
| 199018 |
+
"loss": 0.9989,
|
| 199019 |
+
"step": 78325
|
| 199020 |
+
},
|
| 199021 |
+
{
|
| 199022 |
+
"epoch": 626.43,
|
| 199023 |
+
"learning_rate": 8.751227786752828e-06,
|
| 199024 |
+
"loss": 0.6867,
|
| 199025 |
+
"step": 78330
|
| 199026 |
+
},
|
| 199027 |
+
{
|
| 199028 |
+
"epoch": 626.47,
|
| 199029 |
+
"learning_rate": 8.751147011308563e-06,
|
| 199030 |
+
"loss": 0.3082,
|
| 199031 |
+
"step": 78335
|
| 199032 |
+
},
|
| 199033 |
+
{
|
| 199034 |
+
"epoch": 626.51,
|
| 199035 |
+
"learning_rate": 8.751066235864298e-06,
|
| 199036 |
+
"loss": 0.3448,
|
| 199037 |
+
"step": 78340
|
| 199038 |
+
},
|
| 199039 |
+
{
|
| 199040 |
+
"epoch": 626.55,
|
| 199041 |
+
"learning_rate": 8.750985460420033e-06,
|
| 199042 |
+
"loss": 0.3916,
|
| 199043 |
+
"step": 78345
|
| 199044 |
+
},
|
| 199045 |
+
{
|
| 199046 |
+
"epoch": 626.59,
|
| 199047 |
+
"learning_rate": 8.750904684975768e-06,
|
| 199048 |
+
"loss": 1.0074,
|
| 199049 |
+
"step": 78350
|
| 199050 |
+
},
|
| 199051 |
+
{
|
| 199052 |
+
"epoch": 626.63,
|
| 199053 |
+
"learning_rate": 8.750823909531503e-06,
|
| 199054 |
+
"loss": 0.717,
|
| 199055 |
+
"step": 78355
|
| 199056 |
+
},
|
| 199057 |
+
{
|
| 199058 |
+
"epoch": 626.67,
|
| 199059 |
+
"learning_rate": 8.750743134087238e-06,
|
| 199060 |
+
"loss": 0.3013,
|
| 199061 |
+
"step": 78360
|
| 199062 |
+
},
|
| 199063 |
+
{
|
| 199064 |
+
"epoch": 626.71,
|
| 199065 |
+
"learning_rate": 8.750662358642973e-06,
|
| 199066 |
+
"loss": 0.3156,
|
| 199067 |
+
"step": 78365
|
| 199068 |
+
},
|
| 199069 |
+
{
|
| 199070 |
+
"epoch": 626.75,
|
| 199071 |
+
"learning_rate": 8.750581583198708e-06,
|
| 199072 |
+
"loss": 0.3617,
|
| 199073 |
+
"step": 78370
|
| 199074 |
+
},
|
| 199075 |
+
{
|
| 199076 |
+
"epoch": 626.79,
|
| 199077 |
+
"learning_rate": 8.750500807754442e-06,
|
| 199078 |
+
"loss": 0.8905,
|
| 199079 |
+
"step": 78375
|
| 199080 |
+
},
|
| 199081 |
+
{
|
| 199082 |
+
"epoch": 626.83,
|
| 199083 |
+
"learning_rate": 8.750420032310178e-06,
|
| 199084 |
+
"loss": 0.7685,
|
| 199085 |
+
"step": 78380
|
| 199086 |
+
},
|
| 199087 |
+
{
|
| 199088 |
+
"epoch": 626.87,
|
| 199089 |
+
"learning_rate": 8.750339256865914e-06,
|
| 199090 |
+
"loss": 0.372,
|
| 199091 |
+
"step": 78385
|
| 199092 |
+
},
|
| 199093 |
+
{
|
| 199094 |
+
"epoch": 626.91,
|
| 199095 |
+
"learning_rate": 8.750258481421648e-06,
|
| 199096 |
+
"loss": 0.3526,
|
| 199097 |
+
"step": 78390
|
| 199098 |
+
},
|
| 199099 |
+
{
|
| 199100 |
+
"epoch": 626.95,
|
| 199101 |
+
"learning_rate": 8.750177705977384e-06,
|
| 199102 |
+
"loss": 0.3743,
|
| 199103 |
+
"step": 78395
|
| 199104 |
+
},
|
| 199105 |
+
{
|
| 199106 |
+
"epoch": 626.99,
|
| 199107 |
+
"learning_rate": 8.750096930533118e-06,
|
| 199108 |
+
"loss": 0.7968,
|
| 199109 |
+
"step": 78400
|
| 199110 |
+
},
|
| 199111 |
+
{
|
| 199112 |
+
"epoch": 627.0,
|
| 199113 |
+
"eval_loss": 0.37982842326164246,
|
| 199114 |
+
"eval_runtime": 41.5116,
|
| 199115 |
+
"eval_samples_per_second": 20.139,
|
| 199116 |
+
"eval_steps_per_second": 0.65,
|
| 199117 |
+
"eval_wer": 0.19250363901018921,
|
| 199118 |
+
"step": 78401
|
| 199119 |
}
|
| 199120 |
],
|
| 199121 |
+
"max_steps": 625000,
|
| 199122 |
"num_train_epochs": 5000,
|
| 199123 |
+
"total_flos": 2.2063508310558517e+20,
|
| 199124 |
"trial_name": null,
|
| 199125 |
"trial_params": null
|
| 199126 |
}
|
model-bin/finetune/base/{checkpoint-77779 β checkpoint-78401}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/log/1629894485.228881/events.out.tfevents.1629894485.7e498afd5545.7645.25
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a17fca44a6d71defd159febb47b0f102a559a33776e1ae9ca9ab1f4d7c6e040f
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629894958.0897994/events.out.tfevents.1629894958.7e498afd5545.7645.27
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18c1fd66eb0d71586ea38d19e5eb7bee8a8e88b67b15bb3642c42b2dde1a12e3
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629895430.286524/events.out.tfevents.1629895430.7e498afd5545.7645.29
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc32ae51510a08609a12aafa1c1ffe08d4d0a3fb8fff605bb044ee61d1a42471
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629895902.062021/events.out.tfevents.1629895902.7e498afd5545.7645.31
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:320535633b023c1bcec395df9a9dcfe9fc75350edd0b2ce168649b1b7d7dd807
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629896362.1823292/events.out.tfevents.1629896362.7e498afd5545.7645.33
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2017162fee5d86a229d1ee0ae224f38cba114d7bd31a0211380887367c54f69
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/events.out.tfevents.1629894485.7e498afd5545.7645.24
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5bacfe50390b7242d5bf6f65f93ea3c25bf063ca5070382a28495ad3adba35c
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629894957.7e498afd5545.7645.26
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57567a7cd4c0b02665f029a22c08365713be272ad2f844c52552ee68de946a6b
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629895430.7e498afd5545.7645.28
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35c9bd52bfc64a0115e82a34a1256c2f93466689e84b45cb791fbd31ab9a04ff
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629895902.7e498afd5545.7645.30
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46fd0195b6c518a15311cb29c5d1faf9290021977eea615f33286f92fb713887
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629896362.7e498afd5545.7645.32
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b62f9e66706b8ff65adf2f2bcb7d599d1a334e80e606548ebcf38df5567f983
|
| 3 |
+
size 8622
|