"auto-commit"
Browse files- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/rng_state.pth +1 -1
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/trainer_state.json +634 -4
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/training_args.bin +0 -0
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/rng_state.pth +1 -1
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/trainer_state.json +1743 -6
- model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/training_args.bin +0 -0
- model-bin/finetune/base/log/1630221559.36492/events.out.tfevents.1630221559.cc93b136ebf5.1086.79 +3 -0
- model-bin/finetune/base/log/1630221982.8780048/events.out.tfevents.1630221982.cc93b136ebf5.1086.81 +3 -0
- model-bin/finetune/base/log/1630222527.1516037/events.out.tfevents.1630222527.cc93b136ebf5.1086.83 +3 -0
- model-bin/finetune/base/log/1630222956.4361434/events.out.tfevents.1630222956.cc93b136ebf5.1086.85 +3 -0
- model-bin/finetune/base/log/1630223389.0244238/events.out.tfevents.1630223389.cc93b136ebf5.1086.87 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1630221559.cc93b136ebf5.1086.78 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1630221982.cc93b136ebf5.1086.80 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1630222527.cc93b136ebf5.1086.82 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1630222956.cc93b136ebf5.1086.84 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1630223389.cc93b136ebf5.1086.86 +3 -0
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94daeb11a39388620968ea40ede80990c58e2194b68a663e870a56824841d042
|
| 3 |
size 722165393
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8aa047cbac843388af14ffd5944877cd0c9add5b011f99183ea0c3d4cbe721f
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f118fdd8880a8d151524266d2280960068c739daf1033c5f76756553958f31e
|
| 3 |
size 14503
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e92b0700bb181d8ec04bb405e4281e6135a07379cd92ad2d3e2e3a7f7d5ff4d
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:635336ac426e018d173b5d0df130c24ba1f3e4dc6bc13aea0c41168687786e27
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144483}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.16819527695529718,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-143240",
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -282561,11 +282561,641 @@
|
|
| 282561 |
"eval_steps_per_second": 0.666,
|
| 282562 |
"eval_wer": 0.1780933362916944,
|
| 282563 |
"step": 143987
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282564 |
}
|
| 282565 |
],
|
| 282566 |
-
"max_steps":
|
| 282567 |
"num_train_epochs": 5000,
|
| 282568 |
-
"total_flos": 4.
|
| 282569 |
"trial_name": null,
|
| 282570 |
"trial_params": null
|
| 282571 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.16819527695529718,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-143240",
|
| 4 |
+
"epoch": 1164.995983935743,
|
| 5 |
+
"global_step": 144483,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 282561 |
"eval_steps_per_second": 0.666,
|
| 282562 |
"eval_wer": 0.1780933362916944,
|
| 282563 |
"step": 143987
|
| 282564 |
+
},
|
| 282565 |
+
{
|
| 282566 |
+
"epoch": 1161.02,
|
| 282567 |
+
"learning_rate": 7.690904684975768e-06,
|
| 282568 |
+
"loss": 0.371,
|
| 282569 |
+
"step": 143990
|
| 282570 |
+
},
|
| 282571 |
+
{
|
| 282572 |
+
"epoch": 1161.06,
|
| 282573 |
+
"learning_rate": 7.690823909531503e-06,
|
| 282574 |
+
"loss": 0.2585,
|
| 282575 |
+
"step": 143995
|
| 282576 |
+
},
|
| 282577 |
+
{
|
| 282578 |
+
"epoch": 1161.1,
|
| 282579 |
+
"learning_rate": 7.690743134087238e-06,
|
| 282580 |
+
"loss": 0.2695,
|
| 282581 |
+
"step": 144000
|
| 282582 |
+
},
|
| 282583 |
+
{
|
| 282584 |
+
"epoch": 1161.14,
|
| 282585 |
+
"learning_rate": 7.690662358642972e-06,
|
| 282586 |
+
"loss": 0.3722,
|
| 282587 |
+
"step": 144005
|
| 282588 |
+
},
|
| 282589 |
+
{
|
| 282590 |
+
"epoch": 1161.18,
|
| 282591 |
+
"learning_rate": 7.690581583198708e-06,
|
| 282592 |
+
"loss": 0.6405,
|
| 282593 |
+
"step": 144010
|
| 282594 |
+
},
|
| 282595 |
+
{
|
| 282596 |
+
"epoch": 1161.22,
|
| 282597 |
+
"learning_rate": 7.690500807754442e-06,
|
| 282598 |
+
"loss": 0.7858,
|
| 282599 |
+
"step": 144015
|
| 282600 |
+
},
|
| 282601 |
+
{
|
| 282602 |
+
"epoch": 1161.27,
|
| 282603 |
+
"learning_rate": 7.690420032310178e-06,
|
| 282604 |
+
"loss": 0.2549,
|
| 282605 |
+
"step": 144020
|
| 282606 |
+
},
|
| 282607 |
+
{
|
| 282608 |
+
"epoch": 1161.31,
|
| 282609 |
+
"learning_rate": 7.690339256865912e-06,
|
| 282610 |
+
"loss": 0.2691,
|
| 282611 |
+
"step": 144025
|
| 282612 |
+
},
|
| 282613 |
+
{
|
| 282614 |
+
"epoch": 1161.35,
|
| 282615 |
+
"learning_rate": 7.690258481421648e-06,
|
| 282616 |
+
"loss": 0.3902,
|
| 282617 |
+
"step": 144030
|
| 282618 |
+
},
|
| 282619 |
+
{
|
| 282620 |
+
"epoch": 1161.39,
|
| 282621 |
+
"learning_rate": 7.690177705977382e-06,
|
| 282622 |
+
"loss": 0.7926,
|
| 282623 |
+
"step": 144035
|
| 282624 |
+
},
|
| 282625 |
+
{
|
| 282626 |
+
"epoch": 1161.43,
|
| 282627 |
+
"learning_rate": 7.690096930533118e-06,
|
| 282628 |
+
"loss": 0.8472,
|
| 282629 |
+
"step": 144040
|
| 282630 |
+
},
|
| 282631 |
+
{
|
| 282632 |
+
"epoch": 1161.47,
|
| 282633 |
+
"learning_rate": 7.690016155088854e-06,
|
| 282634 |
+
"loss": 0.2255,
|
| 282635 |
+
"step": 144045
|
| 282636 |
+
},
|
| 282637 |
+
{
|
| 282638 |
+
"epoch": 1161.51,
|
| 282639 |
+
"learning_rate": 7.689935379644588e-06,
|
| 282640 |
+
"loss": 0.2868,
|
| 282641 |
+
"step": 144050
|
| 282642 |
+
},
|
| 282643 |
+
{
|
| 282644 |
+
"epoch": 1161.55,
|
| 282645 |
+
"learning_rate": 7.689854604200324e-06,
|
| 282646 |
+
"loss": 0.3652,
|
| 282647 |
+
"step": 144055
|
| 282648 |
+
},
|
| 282649 |
+
{
|
| 282650 |
+
"epoch": 1161.59,
|
| 282651 |
+
"learning_rate": 7.689773828756058e-06,
|
| 282652 |
+
"loss": 0.7302,
|
| 282653 |
+
"step": 144060
|
| 282654 |
+
},
|
| 282655 |
+
{
|
| 282656 |
+
"epoch": 1161.63,
|
| 282657 |
+
"learning_rate": 7.689693053311794e-06,
|
| 282658 |
+
"loss": 0.8197,
|
| 282659 |
+
"step": 144065
|
| 282660 |
+
},
|
| 282661 |
+
{
|
| 282662 |
+
"epoch": 1161.67,
|
| 282663 |
+
"learning_rate": 7.689612277867528e-06,
|
| 282664 |
+
"loss": 0.3005,
|
| 282665 |
+
"step": 144070
|
| 282666 |
+
},
|
| 282667 |
+
{
|
| 282668 |
+
"epoch": 1161.71,
|
| 282669 |
+
"learning_rate": 7.689531502423264e-06,
|
| 282670 |
+
"loss": 0.3147,
|
| 282671 |
+
"step": 144075
|
| 282672 |
+
},
|
| 282673 |
+
{
|
| 282674 |
+
"epoch": 1161.75,
|
| 282675 |
+
"learning_rate": 7.689450726978998e-06,
|
| 282676 |
+
"loss": 0.3716,
|
| 282677 |
+
"step": 144080
|
| 282678 |
+
},
|
| 282679 |
+
{
|
| 282680 |
+
"epoch": 1161.79,
|
| 282681 |
+
"learning_rate": 7.689369951534734e-06,
|
| 282682 |
+
"loss": 0.6532,
|
| 282683 |
+
"step": 144085
|
| 282684 |
+
},
|
| 282685 |
+
{
|
| 282686 |
+
"epoch": 1161.83,
|
| 282687 |
+
"learning_rate": 7.689289176090468e-06,
|
| 282688 |
+
"loss": 0.7379,
|
| 282689 |
+
"step": 144090
|
| 282690 |
+
},
|
| 282691 |
+
{
|
| 282692 |
+
"epoch": 1161.87,
|
| 282693 |
+
"learning_rate": 7.689208400646204e-06,
|
| 282694 |
+
"loss": 0.3081,
|
| 282695 |
+
"step": 144095
|
| 282696 |
+
},
|
| 282697 |
+
{
|
| 282698 |
+
"epoch": 1161.91,
|
| 282699 |
+
"learning_rate": 7.68912762520194e-06,
|
| 282700 |
+
"loss": 0.3046,
|
| 282701 |
+
"step": 144100
|
| 282702 |
+
},
|
| 282703 |
+
{
|
| 282704 |
+
"epoch": 1161.95,
|
| 282705 |
+
"learning_rate": 7.689046849757674e-06,
|
| 282706 |
+
"loss": 0.4244,
|
| 282707 |
+
"step": 144105
|
| 282708 |
+
},
|
| 282709 |
+
{
|
| 282710 |
+
"epoch": 1161.99,
|
| 282711 |
+
"learning_rate": 7.68896607431341e-06,
|
| 282712 |
+
"loss": 0.8756,
|
| 282713 |
+
"step": 144110
|
| 282714 |
+
},
|
| 282715 |
+
{
|
| 282716 |
+
"epoch": 1162.0,
|
| 282717 |
+
"eval_loss": 0.3087240755558014,
|
| 282718 |
+
"eval_runtime": 40.87,
|
| 282719 |
+
"eval_samples_per_second": 20.504,
|
| 282720 |
+
"eval_steps_per_second": 0.661,
|
| 282721 |
+
"eval_wer": 0.17917888563049852,
|
| 282722 |
+
"step": 144111
|
| 282723 |
+
},
|
| 282724 |
+
{
|
| 282725 |
+
"epoch": 1162.03,
|
| 282726 |
+
"learning_rate": 7.688885298869144e-06,
|
| 282727 |
+
"loss": 0.288,
|
| 282728 |
+
"step": 144115
|
| 282729 |
+
},
|
| 282730 |
+
{
|
| 282731 |
+
"epoch": 1162.07,
|
| 282732 |
+
"learning_rate": 7.68880452342488e-06,
|
| 282733 |
+
"loss": 0.2308,
|
| 282734 |
+
"step": 144120
|
| 282735 |
+
},
|
| 282736 |
+
{
|
| 282737 |
+
"epoch": 1162.11,
|
| 282738 |
+
"learning_rate": 7.688723747980614e-06,
|
| 282739 |
+
"loss": 0.2779,
|
| 282740 |
+
"step": 144125
|
| 282741 |
+
},
|
| 282742 |
+
{
|
| 282743 |
+
"epoch": 1162.15,
|
| 282744 |
+
"learning_rate": 7.68864297253635e-06,
|
| 282745 |
+
"loss": 0.367,
|
| 282746 |
+
"step": 144130
|
| 282747 |
+
},
|
| 282748 |
+
{
|
| 282749 |
+
"epoch": 1162.19,
|
| 282750 |
+
"learning_rate": 7.688562197092084e-06,
|
| 282751 |
+
"loss": 0.8518,
|
| 282752 |
+
"step": 144135
|
| 282753 |
+
},
|
| 282754 |
+
{
|
| 282755 |
+
"epoch": 1162.23,
|
| 282756 |
+
"learning_rate": 7.68848142164782e-06,
|
| 282757 |
+
"loss": 0.5858,
|
| 282758 |
+
"step": 144140
|
| 282759 |
+
},
|
| 282760 |
+
{
|
| 282761 |
+
"epoch": 1162.27,
|
| 282762 |
+
"learning_rate": 7.688400646203554e-06,
|
| 282763 |
+
"loss": 0.2899,
|
| 282764 |
+
"step": 144145
|
| 282765 |
+
},
|
| 282766 |
+
{
|
| 282767 |
+
"epoch": 1162.31,
|
| 282768 |
+
"learning_rate": 7.68831987075929e-06,
|
| 282769 |
+
"loss": 0.2828,
|
| 282770 |
+
"step": 144150
|
| 282771 |
+
},
|
| 282772 |
+
{
|
| 282773 |
+
"epoch": 1162.35,
|
| 282774 |
+
"learning_rate": 7.688239095315024e-06,
|
| 282775 |
+
"loss": 0.3602,
|
| 282776 |
+
"step": 144155
|
| 282777 |
+
},
|
| 282778 |
+
{
|
| 282779 |
+
"epoch": 1162.4,
|
| 282780 |
+
"learning_rate": 7.68815831987076e-06,
|
| 282781 |
+
"loss": 0.8127,
|
| 282782 |
+
"step": 144160
|
| 282783 |
+
},
|
| 282784 |
+
{
|
| 282785 |
+
"epoch": 1162.44,
|
| 282786 |
+
"learning_rate": 7.688077544426495e-06,
|
| 282787 |
+
"loss": 0.6768,
|
| 282788 |
+
"step": 144165
|
| 282789 |
+
},
|
| 282790 |
+
{
|
| 282791 |
+
"epoch": 1162.48,
|
| 282792 |
+
"learning_rate": 7.68799676898223e-06,
|
| 282793 |
+
"loss": 0.2627,
|
| 282794 |
+
"step": 144170
|
| 282795 |
+
},
|
| 282796 |
+
{
|
| 282797 |
+
"epoch": 1162.52,
|
| 282798 |
+
"learning_rate": 7.687915993537965e-06,
|
| 282799 |
+
"loss": 0.2619,
|
| 282800 |
+
"step": 144175
|
| 282801 |
+
},
|
| 282802 |
+
{
|
| 282803 |
+
"epoch": 1162.56,
|
| 282804 |
+
"learning_rate": 7.6878352180937e-06,
|
| 282805 |
+
"loss": 0.3976,
|
| 282806 |
+
"step": 144180
|
| 282807 |
+
},
|
| 282808 |
+
{
|
| 282809 |
+
"epoch": 1162.6,
|
| 282810 |
+
"learning_rate": 7.687754442649435e-06,
|
| 282811 |
+
"loss": 0.8505,
|
| 282812 |
+
"step": 144185
|
| 282813 |
+
},
|
| 282814 |
+
{
|
| 282815 |
+
"epoch": 1162.64,
|
| 282816 |
+
"learning_rate": 7.68767366720517e-06,
|
| 282817 |
+
"loss": 0.6324,
|
| 282818 |
+
"step": 144190
|
| 282819 |
+
},
|
| 282820 |
+
{
|
| 282821 |
+
"epoch": 1162.68,
|
| 282822 |
+
"learning_rate": 7.687592891760905e-06,
|
| 282823 |
+
"loss": 0.2943,
|
| 282824 |
+
"step": 144195
|
| 282825 |
+
},
|
| 282826 |
+
{
|
| 282827 |
+
"epoch": 1162.72,
|
| 282828 |
+
"learning_rate": 7.68751211631664e-06,
|
| 282829 |
+
"loss": 0.4165,
|
| 282830 |
+
"step": 144200
|
| 282831 |
+
},
|
| 282832 |
+
{
|
| 282833 |
+
"epoch": 1162.76,
|
| 282834 |
+
"learning_rate": 7.687431340872375e-06,
|
| 282835 |
+
"loss": 0.4133,
|
| 282836 |
+
"step": 144205
|
| 282837 |
+
},
|
| 282838 |
+
{
|
| 282839 |
+
"epoch": 1162.8,
|
| 282840 |
+
"learning_rate": 7.68735056542811e-06,
|
| 282841 |
+
"loss": 0.8709,
|
| 282842 |
+
"step": 144210
|
| 282843 |
+
},
|
| 282844 |
+
{
|
| 282845 |
+
"epoch": 1162.84,
|
| 282846 |
+
"learning_rate": 7.687269789983845e-06,
|
| 282847 |
+
"loss": 0.5874,
|
| 282848 |
+
"step": 144215
|
| 282849 |
+
},
|
| 282850 |
+
{
|
| 282851 |
+
"epoch": 1162.88,
|
| 282852 |
+
"learning_rate": 7.687189014539581e-06,
|
| 282853 |
+
"loss": 0.2829,
|
| 282854 |
+
"step": 144220
|
| 282855 |
+
},
|
| 282856 |
+
{
|
| 282857 |
+
"epoch": 1162.92,
|
| 282858 |
+
"learning_rate": 7.687108239095315e-06,
|
| 282859 |
+
"loss": 0.2959,
|
| 282860 |
+
"step": 144225
|
| 282861 |
+
},
|
| 282862 |
+
{
|
| 282863 |
+
"epoch": 1162.96,
|
| 282864 |
+
"learning_rate": 7.687027463651051e-06,
|
| 282865 |
+
"loss": 0.4034,
|
| 282866 |
+
"step": 144230
|
| 282867 |
+
},
|
| 282868 |
+
{
|
| 282869 |
+
"epoch": 1163.0,
|
| 282870 |
+
"learning_rate": 7.686946688206785e-06,
|
| 282871 |
+
"loss": 1.1425,
|
| 282872 |
+
"step": 144235
|
| 282873 |
+
},
|
| 282874 |
+
{
|
| 282875 |
+
"epoch": 1163.0,
|
| 282876 |
+
"eval_loss": 0.3543190360069275,
|
| 282877 |
+
"eval_runtime": 42.4164,
|
| 282878 |
+
"eval_samples_per_second": 19.756,
|
| 282879 |
+
"eval_steps_per_second": 0.637,
|
| 282880 |
+
"eval_wer": 0.16862858380876725,
|
| 282881 |
+
"step": 144235
|
| 282882 |
+
},
|
| 282883 |
+
{
|
| 282884 |
+
"epoch": 1163.04,
|
| 282885 |
+
"learning_rate": 7.686865912762521e-06,
|
| 282886 |
+
"loss": 0.3994,
|
| 282887 |
+
"step": 144240
|
| 282888 |
+
},
|
| 282889 |
+
{
|
| 282890 |
+
"epoch": 1163.08,
|
| 282891 |
+
"learning_rate": 7.686785137318255e-06,
|
| 282892 |
+
"loss": 0.2606,
|
| 282893 |
+
"step": 144245
|
| 282894 |
+
},
|
| 282895 |
+
{
|
| 282896 |
+
"epoch": 1163.12,
|
| 282897 |
+
"learning_rate": 7.686704361873991e-06,
|
| 282898 |
+
"loss": 0.2929,
|
| 282899 |
+
"step": 144250
|
| 282900 |
+
},
|
| 282901 |
+
{
|
| 282902 |
+
"epoch": 1163.16,
|
| 282903 |
+
"learning_rate": 7.686623586429725e-06,
|
| 282904 |
+
"loss": 0.5009,
|
| 282905 |
+
"step": 144255
|
| 282906 |
+
},
|
| 282907 |
+
{
|
| 282908 |
+
"epoch": 1163.2,
|
| 282909 |
+
"learning_rate": 7.686542810985461e-06,
|
| 282910 |
+
"loss": 1.1961,
|
| 282911 |
+
"step": 144260
|
| 282912 |
+
},
|
| 282913 |
+
{
|
| 282914 |
+
"epoch": 1163.24,
|
| 282915 |
+
"learning_rate": 7.686462035541195e-06,
|
| 282916 |
+
"loss": 0.3198,
|
| 282917 |
+
"step": 144265
|
| 282918 |
+
},
|
| 282919 |
+
{
|
| 282920 |
+
"epoch": 1163.28,
|
| 282921 |
+
"learning_rate": 7.686381260096931e-06,
|
| 282922 |
+
"loss": 0.2407,
|
| 282923 |
+
"step": 144270
|
| 282924 |
+
},
|
| 282925 |
+
{
|
| 282926 |
+
"epoch": 1163.32,
|
| 282927 |
+
"learning_rate": 7.686300484652667e-06,
|
| 282928 |
+
"loss": 0.3547,
|
| 282929 |
+
"step": 144275
|
| 282930 |
+
},
|
| 282931 |
+
{
|
| 282932 |
+
"epoch": 1163.36,
|
| 282933 |
+
"learning_rate": 7.686219709208401e-06,
|
| 282934 |
+
"loss": 0.5029,
|
| 282935 |
+
"step": 144280
|
| 282936 |
+
},
|
| 282937 |
+
{
|
| 282938 |
+
"epoch": 1163.4,
|
| 282939 |
+
"learning_rate": 7.686138933764137e-06,
|
| 282940 |
+
"loss": 1.2116,
|
| 282941 |
+
"step": 144285
|
| 282942 |
+
},
|
| 282943 |
+
{
|
| 282944 |
+
"epoch": 1163.44,
|
| 282945 |
+
"learning_rate": 7.686058158319871e-06,
|
| 282946 |
+
"loss": 0.3207,
|
| 282947 |
+
"step": 144290
|
| 282948 |
+
},
|
| 282949 |
+
{
|
| 282950 |
+
"epoch": 1163.48,
|
| 282951 |
+
"learning_rate": 7.685977382875607e-06,
|
| 282952 |
+
"loss": 0.2658,
|
| 282953 |
+
"step": 144295
|
| 282954 |
+
},
|
| 282955 |
+
{
|
| 282956 |
+
"epoch": 1163.52,
|
| 282957 |
+
"learning_rate": 7.685896607431341e-06,
|
| 282958 |
+
"loss": 0.4059,
|
| 282959 |
+
"step": 144300
|
| 282960 |
+
},
|
| 282961 |
+
{
|
| 282962 |
+
"epoch": 1163.56,
|
| 282963 |
+
"learning_rate": 7.685815831987077e-06,
|
| 282964 |
+
"loss": 0.4165,
|
| 282965 |
+
"step": 144305
|
| 282966 |
+
},
|
| 282967 |
+
{
|
| 282968 |
+
"epoch": 1163.6,
|
| 282969 |
+
"learning_rate": 7.685735056542811e-06,
|
| 282970 |
+
"loss": 1.1251,
|
| 282971 |
+
"step": 144310
|
| 282972 |
+
},
|
| 282973 |
+
{
|
| 282974 |
+
"epoch": 1163.64,
|
| 282975 |
+
"learning_rate": 7.685654281098547e-06,
|
| 282976 |
+
"loss": 0.2963,
|
| 282977 |
+
"step": 144315
|
| 282978 |
+
},
|
| 282979 |
+
{
|
| 282980 |
+
"epoch": 1163.68,
|
| 282981 |
+
"learning_rate": 7.685573505654281e-06,
|
| 282982 |
+
"loss": 0.3359,
|
| 282983 |
+
"step": 144320
|
| 282984 |
+
},
|
| 282985 |
+
{
|
| 282986 |
+
"epoch": 1163.72,
|
| 282987 |
+
"learning_rate": 7.685492730210017e-06,
|
| 282988 |
+
"loss": 0.3135,
|
| 282989 |
+
"step": 144325
|
| 282990 |
+
},
|
| 282991 |
+
{
|
| 282992 |
+
"epoch": 1163.76,
|
| 282993 |
+
"learning_rate": 7.685411954765751e-06,
|
| 282994 |
+
"loss": 0.4269,
|
| 282995 |
+
"step": 144330
|
| 282996 |
+
},
|
| 282997 |
+
{
|
| 282998 |
+
"epoch": 1163.8,
|
| 282999 |
+
"learning_rate": 7.685331179321487e-06,
|
| 283000 |
+
"loss": 1.1198,
|
| 283001 |
+
"step": 144335
|
| 283002 |
+
},
|
| 283003 |
+
{
|
| 283004 |
+
"epoch": 1163.84,
|
| 283005 |
+
"learning_rate": 7.685250403877223e-06,
|
| 283006 |
+
"loss": 0.3009,
|
| 283007 |
+
"step": 144340
|
| 283008 |
+
},
|
| 283009 |
+
{
|
| 283010 |
+
"epoch": 1163.88,
|
| 283011 |
+
"learning_rate": 7.685169628432957e-06,
|
| 283012 |
+
"loss": 0.2363,
|
| 283013 |
+
"step": 144345
|
| 283014 |
+
},
|
| 283015 |
+
{
|
| 283016 |
+
"epoch": 1163.92,
|
| 283017 |
+
"learning_rate": 7.685088852988693e-06,
|
| 283018 |
+
"loss": 0.286,
|
| 283019 |
+
"step": 144350
|
| 283020 |
+
},
|
| 283021 |
+
{
|
| 283022 |
+
"epoch": 1163.96,
|
| 283023 |
+
"learning_rate": 7.685008077544427e-06,
|
| 283024 |
+
"loss": 0.6232,
|
| 283025 |
+
"step": 144355
|
| 283026 |
+
},
|
| 283027 |
+
{
|
| 283028 |
+
"epoch": 1164.0,
|
| 283029 |
+
"eval_loss": 0.46291494369506836,
|
| 283030 |
+
"eval_runtime": 40.1634,
|
| 283031 |
+
"eval_samples_per_second": 20.84,
|
| 283032 |
+
"eval_steps_per_second": 0.672,
|
| 283033 |
+
"eval_wer": 0.1854970934565509,
|
| 283034 |
+
"step": 144359
|
| 283035 |
+
},
|
| 283036 |
+
{
|
| 283037 |
+
"epoch": 1164.01,
|
| 283038 |
+
"learning_rate": 7.684927302100163e-06,
|
| 283039 |
+
"loss": 0.39,
|
| 283040 |
+
"step": 144360
|
| 283041 |
+
},
|
| 283042 |
+
{
|
| 283043 |
+
"epoch": 1164.05,
|
| 283044 |
+
"learning_rate": 7.684846526655897e-06,
|
| 283045 |
+
"loss": 0.2847,
|
| 283046 |
+
"step": 144365
|
| 283047 |
+
},
|
| 283048 |
+
{
|
| 283049 |
+
"epoch": 1164.09,
|
| 283050 |
+
"learning_rate": 7.684765751211633e-06,
|
| 283051 |
+
"loss": 0.294,
|
| 283052 |
+
"step": 144370
|
| 283053 |
+
},
|
| 283054 |
+
{
|
| 283055 |
+
"epoch": 1164.13,
|
| 283056 |
+
"learning_rate": 7.684684975767367e-06,
|
| 283057 |
+
"loss": 0.3235,
|
| 283058 |
+
"step": 144375
|
| 283059 |
+
},
|
| 283060 |
+
{
|
| 283061 |
+
"epoch": 1164.17,
|
| 283062 |
+
"learning_rate": 7.684604200323102e-06,
|
| 283063 |
+
"loss": 0.4361,
|
| 283064 |
+
"step": 144380
|
| 283065 |
+
},
|
| 283066 |
+
{
|
| 283067 |
+
"epoch": 1164.21,
|
| 283068 |
+
"learning_rate": 7.684523424878837e-06,
|
| 283069 |
+
"loss": 1.1416,
|
| 283070 |
+
"step": 144385
|
| 283071 |
+
},
|
| 283072 |
+
{
|
| 283073 |
+
"epoch": 1164.25,
|
| 283074 |
+
"learning_rate": 7.684442649434572e-06,
|
| 283075 |
+
"loss": 0.3318,
|
| 283076 |
+
"step": 144390
|
| 283077 |
+
},
|
| 283078 |
+
{
|
| 283079 |
+
"epoch": 1164.29,
|
| 283080 |
+
"learning_rate": 7.684361873990308e-06,
|
| 283081 |
+
"loss": 0.2674,
|
| 283082 |
+
"step": 144395
|
| 283083 |
+
},
|
| 283084 |
+
{
|
| 283085 |
+
"epoch": 1164.33,
|
| 283086 |
+
"learning_rate": 7.684281098546042e-06,
|
| 283087 |
+
"loss": 0.295,
|
| 283088 |
+
"step": 144400
|
| 283089 |
+
},
|
| 283090 |
+
{
|
| 283091 |
+
"epoch": 1164.37,
|
| 283092 |
+
"learning_rate": 7.684200323101778e-06,
|
| 283093 |
+
"loss": 0.4393,
|
| 283094 |
+
"step": 144405
|
| 283095 |
+
},
|
| 283096 |
+
{
|
| 283097 |
+
"epoch": 1164.41,
|
| 283098 |
+
"learning_rate": 7.684119547657512e-06,
|
| 283099 |
+
"loss": 1.0585,
|
| 283100 |
+
"step": 144410
|
| 283101 |
+
},
|
| 283102 |
+
{
|
| 283103 |
+
"epoch": 1164.45,
|
| 283104 |
+
"learning_rate": 7.684038772213248e-06,
|
| 283105 |
+
"loss": 0.312,
|
| 283106 |
+
"step": 144415
|
| 283107 |
+
},
|
| 283108 |
+
{
|
| 283109 |
+
"epoch": 1164.49,
|
| 283110 |
+
"learning_rate": 7.683957996768982e-06,
|
| 283111 |
+
"loss": 0.2595,
|
| 283112 |
+
"step": 144420
|
| 283113 |
+
},
|
| 283114 |
+
{
|
| 283115 |
+
"epoch": 1164.53,
|
| 283116 |
+
"learning_rate": 7.683877221324718e-06,
|
| 283117 |
+
"loss": 0.2933,
|
| 283118 |
+
"step": 144425
|
| 283119 |
+
},
|
| 283120 |
+
{
|
| 283121 |
+
"epoch": 1164.57,
|
| 283122 |
+
"learning_rate": 7.683796445880452e-06,
|
| 283123 |
+
"loss": 0.515,
|
| 283124 |
+
"step": 144430
|
| 283125 |
+
},
|
| 283126 |
+
{
|
| 283127 |
+
"epoch": 1164.61,
|
| 283128 |
+
"learning_rate": 7.683715670436188e-06,
|
| 283129 |
+
"loss": 0.9295,
|
| 283130 |
+
"step": 144435
|
| 283131 |
+
},
|
| 283132 |
+
{
|
| 283133 |
+
"epoch": 1164.65,
|
| 283134 |
+
"learning_rate": 7.683634894991922e-06,
|
| 283135 |
+
"loss": 0.2886,
|
| 283136 |
+
"step": 144440
|
| 283137 |
+
},
|
| 283138 |
+
{
|
| 283139 |
+
"epoch": 1164.69,
|
| 283140 |
+
"learning_rate": 7.683554119547658e-06,
|
| 283141 |
+
"loss": 0.3172,
|
| 283142 |
+
"step": 144445
|
| 283143 |
+
},
|
| 283144 |
+
{
|
| 283145 |
+
"epoch": 1164.73,
|
| 283146 |
+
"learning_rate": 7.683473344103394e-06,
|
| 283147 |
+
"loss": 0.354,
|
| 283148 |
+
"step": 144450
|
| 283149 |
+
},
|
| 283150 |
+
{
|
| 283151 |
+
"epoch": 1164.77,
|
| 283152 |
+
"learning_rate": 7.683392568659128e-06,
|
| 283153 |
+
"loss": 0.4889,
|
| 283154 |
+
"step": 144455
|
| 283155 |
+
},
|
| 283156 |
+
{
|
| 283157 |
+
"epoch": 1164.81,
|
| 283158 |
+
"learning_rate": 7.683311793214864e-06,
|
| 283159 |
+
"loss": 0.9191,
|
| 283160 |
+
"step": 144460
|
| 283161 |
+
},
|
| 283162 |
+
{
|
| 283163 |
+
"epoch": 1164.85,
|
| 283164 |
+
"learning_rate": 7.683231017770598e-06,
|
| 283165 |
+
"loss": 0.3006,
|
| 283166 |
+
"step": 144465
|
| 283167 |
+
},
|
| 283168 |
+
{
|
| 283169 |
+
"epoch": 1164.89,
|
| 283170 |
+
"learning_rate": 7.683150242326334e-06,
|
| 283171 |
+
"loss": 0.309,
|
| 283172 |
+
"step": 144470
|
| 283173 |
+
},
|
| 283174 |
+
{
|
| 283175 |
+
"epoch": 1164.93,
|
| 283176 |
+
"learning_rate": 7.683069466882068e-06,
|
| 283177 |
+
"loss": 1.1332,
|
| 283178 |
+
"step": 144475
|
| 283179 |
+
},
|
| 283180 |
+
{
|
| 283181 |
+
"epoch": 1164.97,
|
| 283182 |
+
"learning_rate": 7.682988691437804e-06,
|
| 283183 |
+
"loss": 0.5342,
|
| 283184 |
+
"step": 144480
|
| 283185 |
+
},
|
| 283186 |
+
{
|
| 283187 |
+
"epoch": 1165.0,
|
| 283188 |
+
"eval_loss": 0.3511020839214325,
|
| 283189 |
+
"eval_runtime": 41.8487,
|
| 283190 |
+
"eval_samples_per_second": 20.001,
|
| 283191 |
+
"eval_steps_per_second": 0.645,
|
| 283192 |
+
"eval_wer": 0.1758169934640523,
|
| 283193 |
+
"step": 144483
|
| 283194 |
}
|
| 283195 |
],
|
| 283196 |
+
"max_steps": 620000,
|
| 283197 |
"num_train_epochs": 5000,
|
| 283198 |
+
"total_flos": 4.0657073017058637e+20,
|
| 283199 |
"trial_name": null,
|
| 283200 |
"trial_params": null
|
| 283201 |
}
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144483}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3a31d12d3a40189fee341d3b76a0101b8db55ecb439c3e3c343c228cca78970
|
| 3 |
size 722165393
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4b2684ba7a14d445d60ddc5b89de28c115fbf6a519531d67afceb91c7989d83
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffc499edbd4cc6010e711c1416b1e31cba07b00be8cbc3ad58f76d89ac022865
|
| 3 |
size 14503
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1c2d2c4d17da49428ebd8e004bc9f986c0670c2986e3bc7cf69a3da37fc6a76
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9f6c027f71dc4e95cbb49a9cf4b6d427b2c346d5a19937f76dd5c3aeab251f6
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-143240 β checkpoint-144607}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -281613,11 +281613,1748 @@
|
|
| 281613 |
"eval_steps_per_second": 0.646,
|
| 281614 |
"eval_wer": 0.16819527695529718,
|
| 281615 |
"step": 143240
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281616 |
}
|
| 281617 |
],
|
| 281618 |
-
"max_steps":
|
| 281619 |
"num_train_epochs": 5000,
|
| 281620 |
-
"total_flos": 4.
|
| 281621 |
"trial_name": null,
|
| 281622 |
"trial_params": null
|
| 281623 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.16790141568253503,
|
| 3 |
+
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-144607",
|
| 4 |
+
"epoch": 1165.995983935743,
|
| 5 |
+
"global_step": 144607,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 281613 |
"eval_steps_per_second": 0.646,
|
| 281614 |
"eval_wer": 0.16819527695529718,
|
| 281615 |
"step": 143240
|
| 281616 |
+
},
|
| 281617 |
+
{
|
| 281618 |
+
"epoch": 1145.04,
|
| 281619 |
+
"learning_rate": 7.702940226171244e-06,
|
| 281620 |
+
"loss": 0.3398,
|
| 281621 |
+
"step": 143245
|
| 281622 |
+
},
|
| 281623 |
+
{
|
| 281624 |
+
"epoch": 1145.08,
|
| 281625 |
+
"learning_rate": 7.70285945072698e-06,
|
| 281626 |
+
"loss": 0.2868,
|
| 281627 |
+
"step": 143250
|
| 281628 |
+
},
|
| 281629 |
+
{
|
| 281630 |
+
"epoch": 1145.12,
|
| 281631 |
+
"learning_rate": 7.702778675282714e-06,
|
| 281632 |
+
"loss": 0.286,
|
| 281633 |
+
"step": 143255
|
| 281634 |
+
},
|
| 281635 |
+
{
|
| 281636 |
+
"epoch": 1145.16,
|
| 281637 |
+
"learning_rate": 7.70269789983845e-06,
|
| 281638 |
+
"loss": 0.4329,
|
| 281639 |
+
"step": 143260
|
| 281640 |
+
},
|
| 281641 |
+
{
|
| 281642 |
+
"epoch": 1145.2,
|
| 281643 |
+
"learning_rate": 7.702617124394184e-06,
|
| 281644 |
+
"loss": 1.2206,
|
| 281645 |
+
"step": 143265
|
| 281646 |
+
},
|
| 281647 |
+
{
|
| 281648 |
+
"epoch": 1145.24,
|
| 281649 |
+
"learning_rate": 7.70253634894992e-06,
|
| 281650 |
+
"loss": 0.3035,
|
| 281651 |
+
"step": 143270
|
| 281652 |
+
},
|
| 281653 |
+
{
|
| 281654 |
+
"epoch": 1145.28,
|
| 281655 |
+
"learning_rate": 7.702455573505654e-06,
|
| 281656 |
+
"loss": 0.2856,
|
| 281657 |
+
"step": 143275
|
| 281658 |
+
},
|
| 281659 |
+
{
|
| 281660 |
+
"epoch": 1145.32,
|
| 281661 |
+
"learning_rate": 7.70237479806139e-06,
|
| 281662 |
+
"loss": 0.3347,
|
| 281663 |
+
"step": 143280
|
| 281664 |
+
},
|
| 281665 |
+
{
|
| 281666 |
+
"epoch": 1145.36,
|
| 281667 |
+
"learning_rate": 7.702294022617126e-06,
|
| 281668 |
+
"loss": 0.4613,
|
| 281669 |
+
"step": 143285
|
| 281670 |
+
},
|
| 281671 |
+
{
|
| 281672 |
+
"epoch": 1145.4,
|
| 281673 |
+
"learning_rate": 7.70221324717286e-06,
|
| 281674 |
+
"loss": 1.0676,
|
| 281675 |
+
"step": 143290
|
| 281676 |
+
},
|
| 281677 |
+
{
|
| 281678 |
+
"epoch": 1145.44,
|
| 281679 |
+
"learning_rate": 7.702132471728596e-06,
|
| 281680 |
+
"loss": 0.3332,
|
| 281681 |
+
"step": 143295
|
| 281682 |
+
},
|
| 281683 |
+
{
|
| 281684 |
+
"epoch": 1145.48,
|
| 281685 |
+
"learning_rate": 7.70205169628433e-06,
|
| 281686 |
+
"loss": 0.2514,
|
| 281687 |
+
"step": 143300
|
| 281688 |
+
},
|
| 281689 |
+
{
|
| 281690 |
+
"epoch": 1145.52,
|
| 281691 |
+
"learning_rate": 7.701970920840066e-06,
|
| 281692 |
+
"loss": 0.2744,
|
| 281693 |
+
"step": 143305
|
| 281694 |
+
},
|
| 281695 |
+
{
|
| 281696 |
+
"epoch": 1145.56,
|
| 281697 |
+
"learning_rate": 7.7018901453958e-06,
|
| 281698 |
+
"loss": 0.3931,
|
| 281699 |
+
"step": 143310
|
| 281700 |
+
},
|
| 281701 |
+
{
|
| 281702 |
+
"epoch": 1145.6,
|
| 281703 |
+
"learning_rate": 7.701809369951536e-06,
|
| 281704 |
+
"loss": 1.015,
|
| 281705 |
+
"step": 143315
|
| 281706 |
+
},
|
| 281707 |
+
{
|
| 281708 |
+
"epoch": 1145.64,
|
| 281709 |
+
"learning_rate": 7.70172859450727e-06,
|
| 281710 |
+
"loss": 0.3073,
|
| 281711 |
+
"step": 143320
|
| 281712 |
+
},
|
| 281713 |
+
{
|
| 281714 |
+
"epoch": 1145.68,
|
| 281715 |
+
"learning_rate": 7.701647819063006e-06,
|
| 281716 |
+
"loss": 0.3129,
|
| 281717 |
+
"step": 143325
|
| 281718 |
+
},
|
| 281719 |
+
{
|
| 281720 |
+
"epoch": 1145.72,
|
| 281721 |
+
"learning_rate": 7.70156704361874e-06,
|
| 281722 |
+
"loss": 0.2667,
|
| 281723 |
+
"step": 143330
|
| 281724 |
+
},
|
| 281725 |
+
{
|
| 281726 |
+
"epoch": 1145.76,
|
| 281727 |
+
"learning_rate": 7.701486268174476e-06,
|
| 281728 |
+
"loss": 0.4446,
|
| 281729 |
+
"step": 143335
|
| 281730 |
+
},
|
| 281731 |
+
{
|
| 281732 |
+
"epoch": 1145.8,
|
| 281733 |
+
"learning_rate": 7.70140549273021e-06,
|
| 281734 |
+
"loss": 1.2056,
|
| 281735 |
+
"step": 143340
|
| 281736 |
+
},
|
| 281737 |
+
{
|
| 281738 |
+
"epoch": 1145.84,
|
| 281739 |
+
"learning_rate": 7.701324717285946e-06,
|
| 281740 |
+
"loss": 0.2746,
|
| 281741 |
+
"step": 143345
|
| 281742 |
+
},
|
| 281743 |
+
{
|
| 281744 |
+
"epoch": 1145.88,
|
| 281745 |
+
"learning_rate": 7.701243941841682e-06,
|
| 281746 |
+
"loss": 0.277,
|
| 281747 |
+
"step": 143350
|
| 281748 |
+
},
|
| 281749 |
+
{
|
| 281750 |
+
"epoch": 1145.92,
|
| 281751 |
+
"learning_rate": 7.701163166397416e-06,
|
| 281752 |
+
"loss": 0.3012,
|
| 281753 |
+
"step": 143355
|
| 281754 |
+
},
|
| 281755 |
+
{
|
| 281756 |
+
"epoch": 1145.96,
|
| 281757 |
+
"learning_rate": 7.701082390953151e-06,
|
| 281758 |
+
"loss": 0.415,
|
| 281759 |
+
"step": 143360
|
| 281760 |
+
},
|
| 281761 |
+
{
|
| 281762 |
+
"epoch": 1146.0,
|
| 281763 |
+
"learning_rate": 7.701001615508886e-06,
|
| 281764 |
+
"loss": 1.132,
|
| 281765 |
+
"step": 143365
|
| 281766 |
+
},
|
| 281767 |
+
{
|
| 281768 |
+
"epoch": 1146.0,
|
| 281769 |
+
"eval_loss": 0.32241275906562805,
|
| 281770 |
+
"eval_runtime": 41.468,
|
| 281771 |
+
"eval_samples_per_second": 20.208,
|
| 281772 |
+
"eval_steps_per_second": 0.651,
|
| 281773 |
+
"eval_wer": 0.18056155507559396,
|
| 281774 |
+
"step": 143365
|
| 281775 |
+
},
|
| 281776 |
+
{
|
| 281777 |
+
"epoch": 1156.04,
|
| 281778 |
+
"learning_rate": 7.700920840064621e-06,
|
| 281779 |
+
"loss": 0.2962,
|
| 281780 |
+
"step": 143370
|
| 281781 |
+
},
|
| 281782 |
+
{
|
| 281783 |
+
"epoch": 1156.08,
|
| 281784 |
+
"learning_rate": 7.700840064620356e-06,
|
| 281785 |
+
"loss": 0.2587,
|
| 281786 |
+
"step": 143375
|
| 281787 |
+
},
|
| 281788 |
+
{
|
| 281789 |
+
"epoch": 1156.12,
|
| 281790 |
+
"learning_rate": 7.700759289176091e-06,
|
| 281791 |
+
"loss": 0.3334,
|
| 281792 |
+
"step": 143380
|
| 281793 |
+
},
|
| 281794 |
+
{
|
| 281795 |
+
"epoch": 1156.16,
|
| 281796 |
+
"learning_rate": 7.700678513731826e-06,
|
| 281797 |
+
"loss": 0.3843,
|
| 281798 |
+
"step": 143385
|
| 281799 |
+
},
|
| 281800 |
+
{
|
| 281801 |
+
"epoch": 1156.2,
|
| 281802 |
+
"learning_rate": 7.700597738287561e-06,
|
| 281803 |
+
"loss": 1.2568,
|
| 281804 |
+
"step": 143390
|
| 281805 |
+
},
|
| 281806 |
+
{
|
| 281807 |
+
"epoch": 1156.24,
|
| 281808 |
+
"learning_rate": 7.700516962843296e-06,
|
| 281809 |
+
"loss": 0.3173,
|
| 281810 |
+
"step": 143395
|
| 281811 |
+
},
|
| 281812 |
+
{
|
| 281813 |
+
"epoch": 1156.28,
|
| 281814 |
+
"learning_rate": 7.700436187399031e-06,
|
| 281815 |
+
"loss": 0.2457,
|
| 281816 |
+
"step": 143400
|
| 281817 |
+
},
|
| 281818 |
+
{
|
| 281819 |
+
"epoch": 1156.32,
|
| 281820 |
+
"learning_rate": 7.700355411954766e-06,
|
| 281821 |
+
"loss": 0.3265,
|
| 281822 |
+
"step": 143405
|
| 281823 |
+
},
|
| 281824 |
+
{
|
| 281825 |
+
"epoch": 1156.36,
|
| 281826 |
+
"learning_rate": 7.700274636510501e-06,
|
| 281827 |
+
"loss": 0.4942,
|
| 281828 |
+
"step": 143410
|
| 281829 |
+
},
|
| 281830 |
+
{
|
| 281831 |
+
"epoch": 1156.4,
|
| 281832 |
+
"learning_rate": 7.700193861066237e-06,
|
| 281833 |
+
"loss": 1.1454,
|
| 281834 |
+
"step": 143415
|
| 281835 |
+
},
|
| 281836 |
+
{
|
| 281837 |
+
"epoch": 1156.44,
|
| 281838 |
+
"learning_rate": 7.700113085621971e-06,
|
| 281839 |
+
"loss": 0.3211,
|
| 281840 |
+
"step": 143420
|
| 281841 |
+
},
|
| 281842 |
+
{
|
| 281843 |
+
"epoch": 1156.48,
|
| 281844 |
+
"learning_rate": 7.700032310177707e-06,
|
| 281845 |
+
"loss": 0.2822,
|
| 281846 |
+
"step": 143425
|
| 281847 |
+
},
|
| 281848 |
+
{
|
| 281849 |
+
"epoch": 1156.52,
|
| 281850 |
+
"learning_rate": 7.699951534733441e-06,
|
| 281851 |
+
"loss": 0.3959,
|
| 281852 |
+
"step": 143430
|
| 281853 |
+
},
|
| 281854 |
+
{
|
| 281855 |
+
"epoch": 1156.56,
|
| 281856 |
+
"learning_rate": 7.699870759289177e-06,
|
| 281857 |
+
"loss": 0.4006,
|
| 281858 |
+
"step": 143435
|
| 281859 |
+
},
|
| 281860 |
+
{
|
| 281861 |
+
"epoch": 1156.6,
|
| 281862 |
+
"learning_rate": 7.699789983844911e-06,
|
| 281863 |
+
"loss": 1.0403,
|
| 281864 |
+
"step": 143440
|
| 281865 |
+
},
|
| 281866 |
+
{
|
| 281867 |
+
"epoch": 1156.65,
|
| 281868 |
+
"learning_rate": 7.699709208400647e-06,
|
| 281869 |
+
"loss": 0.3076,
|
| 281870 |
+
"step": 143445
|
| 281871 |
+
},
|
| 281872 |
+
{
|
| 281873 |
+
"epoch": 1156.69,
|
| 281874 |
+
"learning_rate": 7.699628432956381e-06,
|
| 281875 |
+
"loss": 0.3066,
|
| 281876 |
+
"step": 143450
|
| 281877 |
+
},
|
| 281878 |
+
{
|
| 281879 |
+
"epoch": 1156.73,
|
| 281880 |
+
"learning_rate": 7.699547657512117e-06,
|
| 281881 |
+
"loss": 0.3324,
|
| 281882 |
+
"step": 143455
|
| 281883 |
+
},
|
| 281884 |
+
{
|
| 281885 |
+
"epoch": 1156.77,
|
| 281886 |
+
"learning_rate": 7.699466882067851e-06,
|
| 281887 |
+
"loss": 0.441,
|
| 281888 |
+
"step": 143460
|
| 281889 |
+
},
|
| 281890 |
+
{
|
| 281891 |
+
"epoch": 1156.81,
|
| 281892 |
+
"learning_rate": 7.699386106623587e-06,
|
| 281893 |
+
"loss": 1.2269,
|
| 281894 |
+
"step": 143465
|
| 281895 |
+
},
|
| 281896 |
+
{
|
| 281897 |
+
"epoch": 1156.85,
|
| 281898 |
+
"learning_rate": 7.699305331179321e-06,
|
| 281899 |
+
"loss": 0.2736,
|
| 281900 |
+
"step": 143470
|
| 281901 |
+
},
|
| 281902 |
+
{
|
| 281903 |
+
"epoch": 1156.89,
|
| 281904 |
+
"learning_rate": 7.699224555735057e-06,
|
| 281905 |
+
"loss": 0.2975,
|
| 281906 |
+
"step": 143475
|
| 281907 |
+
},
|
| 281908 |
+
{
|
| 281909 |
+
"epoch": 1156.93,
|
| 281910 |
+
"learning_rate": 7.699143780290791e-06,
|
| 281911 |
+
"loss": 0.2988,
|
| 281912 |
+
"step": 143480
|
| 281913 |
+
},
|
| 281914 |
+
{
|
| 281915 |
+
"epoch": 1156.97,
|
| 281916 |
+
"learning_rate": 7.699063004846527e-06,
|
| 281917 |
+
"loss": 0.4613,
|
| 281918 |
+
"step": 143485
|
| 281919 |
+
},
|
| 281920 |
+
{
|
| 281921 |
+
"epoch": 1157.0,
|
| 281922 |
+
"eval_loss": 0.3576335608959198,
|
| 281923 |
+
"eval_runtime": 41.3609,
|
| 281924 |
+
"eval_samples_per_second": 20.236,
|
| 281925 |
+
"eval_steps_per_second": 0.653,
|
| 281926 |
+
"eval_wer": 0.1753503413582465,
|
| 281927 |
+
"step": 143489
|
| 281928 |
+
},
|
| 281929 |
+
{
|
| 281930 |
+
"epoch": 1157.01,
|
| 281931 |
+
"learning_rate": 7.698982229402263e-06,
|
| 281932 |
+
"loss": 0.3329,
|
| 281933 |
+
"step": 143490
|
| 281934 |
+
},
|
| 281935 |
+
{
|
| 281936 |
+
"epoch": 1157.05,
|
| 281937 |
+
"learning_rate": 7.698901453957997e-06,
|
| 281938 |
+
"loss": 0.288,
|
| 281939 |
+
"step": 143495
|
| 281940 |
+
},
|
| 281941 |
+
{
|
| 281942 |
+
"epoch": 1157.09,
|
| 281943 |
+
"learning_rate": 7.698820678513733e-06,
|
| 281944 |
+
"loss": 0.3328,
|
| 281945 |
+
"step": 143500
|
| 281946 |
+
},
|
| 281947 |
+
{
|
| 281948 |
+
"epoch": 1157.13,
|
| 281949 |
+
"learning_rate": 7.698739903069467e-06,
|
| 281950 |
+
"loss": 0.3706,
|
| 281951 |
+
"step": 143505
|
| 281952 |
+
},
|
| 281953 |
+
{
|
| 281954 |
+
"epoch": 1157.17,
|
| 281955 |
+
"learning_rate": 7.698659127625203e-06,
|
| 281956 |
+
"loss": 0.5247,
|
| 281957 |
+
"step": 143510
|
| 281958 |
+
},
|
| 281959 |
+
{
|
| 281960 |
+
"epoch": 1157.21,
|
| 281961 |
+
"learning_rate": 7.698578352180937e-06,
|
| 281962 |
+
"loss": 1.1031,
|
| 281963 |
+
"step": 143515
|
| 281964 |
+
},
|
| 281965 |
+
{
|
| 281966 |
+
"epoch": 1157.25,
|
| 281967 |
+
"learning_rate": 7.698497576736673e-06,
|
| 281968 |
+
"loss": 0.3005,
|
| 281969 |
+
"step": 143520
|
| 281970 |
+
},
|
| 281971 |
+
{
|
| 281972 |
+
"epoch": 1157.29,
|
| 281973 |
+
"learning_rate": 7.698416801292407e-06,
|
| 281974 |
+
"loss": 0.3484,
|
| 281975 |
+
"step": 143525
|
| 281976 |
+
},
|
| 281977 |
+
{
|
| 281978 |
+
"epoch": 1157.33,
|
| 281979 |
+
"learning_rate": 7.698336025848143e-06,
|
| 281980 |
+
"loss": 0.2836,
|
| 281981 |
+
"step": 143530
|
| 281982 |
+
},
|
| 281983 |
+
{
|
| 281984 |
+
"epoch": 1157.37,
|
| 281985 |
+
"learning_rate": 7.698255250403877e-06,
|
| 281986 |
+
"loss": 0.5206,
|
| 281987 |
+
"step": 143535
|
| 281988 |
+
},
|
| 281989 |
+
{
|
| 281990 |
+
"epoch": 1157.41,
|
| 281991 |
+
"learning_rate": 7.698174474959613e-06,
|
| 281992 |
+
"loss": 1.0886,
|
| 281993 |
+
"step": 143540
|
| 281994 |
+
},
|
| 281995 |
+
{
|
| 281996 |
+
"epoch": 1157.45,
|
| 281997 |
+
"learning_rate": 7.698093699515347e-06,
|
| 281998 |
+
"loss": 0.3511,
|
| 281999 |
+
"step": 143545
|
| 282000 |
+
},
|
| 282001 |
+
{
|
| 282002 |
+
"epoch": 1157.49,
|
| 282003 |
+
"learning_rate": 7.698012924071083e-06,
|
| 282004 |
+
"loss": 0.286,
|
| 282005 |
+
"step": 143550
|
| 282006 |
+
},
|
| 282007 |
+
{
|
| 282008 |
+
"epoch": 1157.53,
|
| 282009 |
+
"learning_rate": 7.697932148626819e-06,
|
| 282010 |
+
"loss": 0.3322,
|
| 282011 |
+
"step": 143555
|
| 282012 |
+
},
|
| 282013 |
+
{
|
| 282014 |
+
"epoch": 1157.57,
|
| 282015 |
+
"learning_rate": 7.697851373182553e-06,
|
| 282016 |
+
"loss": 0.5271,
|
| 282017 |
+
"step": 143560
|
| 282018 |
+
},
|
| 282019 |
+
{
|
| 282020 |
+
"epoch": 1157.61,
|
| 282021 |
+
"learning_rate": 7.697770597738289e-06,
|
| 282022 |
+
"loss": 1.0804,
|
| 282023 |
+
"step": 143565
|
| 282024 |
+
},
|
| 282025 |
+
{
|
| 282026 |
+
"epoch": 1157.65,
|
| 282027 |
+
"learning_rate": 7.697689822294023e-06,
|
| 282028 |
+
"loss": 0.2825,
|
| 282029 |
+
"step": 143570
|
| 282030 |
+
},
|
| 282031 |
+
{
|
| 282032 |
+
"epoch": 1157.69,
|
| 282033 |
+
"learning_rate": 7.697609046849758e-06,
|
| 282034 |
+
"loss": 0.2583,
|
| 282035 |
+
"step": 143575
|
| 282036 |
+
},
|
| 282037 |
+
{
|
| 282038 |
+
"epoch": 1157.73,
|
| 282039 |
+
"learning_rate": 7.697528271405493e-06,
|
| 282040 |
+
"loss": 0.3537,
|
| 282041 |
+
"step": 143580
|
| 282042 |
+
},
|
| 282043 |
+
{
|
| 282044 |
+
"epoch": 1157.77,
|
| 282045 |
+
"learning_rate": 7.697447495961228e-06,
|
| 282046 |
+
"loss": 0.5304,
|
| 282047 |
+
"step": 143585
|
| 282048 |
+
},
|
| 282049 |
+
{
|
| 282050 |
+
"epoch": 1157.81,
|
| 282051 |
+
"learning_rate": 7.697366720516963e-06,
|
| 282052 |
+
"loss": 1.2032,
|
| 282053 |
+
"step": 143590
|
| 282054 |
+
},
|
| 282055 |
+
{
|
| 282056 |
+
"epoch": 1157.85,
|
| 282057 |
+
"learning_rate": 7.697285945072698e-06,
|
| 282058 |
+
"loss": 0.3729,
|
| 282059 |
+
"step": 143595
|
| 282060 |
+
},
|
| 282061 |
+
{
|
| 282062 |
+
"epoch": 1157.9,
|
| 282063 |
+
"learning_rate": 7.697205169628433e-06,
|
| 282064 |
+
"loss": 0.291,
|
| 282065 |
+
"step": 143600
|
| 282066 |
+
},
|
| 282067 |
+
{
|
| 282068 |
+
"epoch": 1157.94,
|
| 282069 |
+
"learning_rate": 7.697124394184168e-06,
|
| 282070 |
+
"loss": 0.2936,
|
| 282071 |
+
"step": 143605
|
| 282072 |
+
},
|
| 282073 |
+
{
|
| 282074 |
+
"epoch": 1157.98,
|
| 282075 |
+
"learning_rate": 7.697043618739903e-06,
|
| 282076 |
+
"loss": 0.5296,
|
| 282077 |
+
"step": 143610
|
| 282078 |
+
},
|
| 282079 |
+
{
|
| 282080 |
+
"epoch": 1158.0,
|
| 282081 |
+
"eval_loss": 0.39611005783081055,
|
| 282082 |
+
"eval_runtime": 40.5454,
|
| 282083 |
+
"eval_samples_per_second": 20.644,
|
| 282084 |
+
"eval_steps_per_second": 0.666,
|
| 282085 |
+
"eval_wer": 0.18108326596604687,
|
| 282086 |
+
"step": 143613
|
| 282087 |
+
},
|
| 282088 |
+
{
|
| 282089 |
+
"epoch": 1158.02,
|
| 282090 |
+
"learning_rate": 7.696962843295638e-06,
|
| 282091 |
+
"loss": 0.3619,
|
| 282092 |
+
"step": 143615
|
| 282093 |
+
},
|
| 282094 |
+
{
|
| 282095 |
+
"epoch": 1158.06,
|
| 282096 |
+
"learning_rate": 7.696882067851374e-06,
|
| 282097 |
+
"loss": 0.3366,
|
| 282098 |
+
"step": 143620
|
| 282099 |
+
},
|
| 282100 |
+
{
|
| 282101 |
+
"epoch": 1158.1,
|
| 282102 |
+
"learning_rate": 7.696801292407108e-06,
|
| 282103 |
+
"loss": 0.2358,
|
| 282104 |
+
"step": 143625
|
| 282105 |
+
},
|
| 282106 |
+
{
|
| 282107 |
+
"epoch": 1158.14,
|
| 282108 |
+
"learning_rate": 7.696720516962844e-06,
|
| 282109 |
+
"loss": 0.3511,
|
| 282110 |
+
"step": 143630
|
| 282111 |
+
},
|
| 282112 |
+
{
|
| 282113 |
+
"epoch": 1158.18,
|
| 282114 |
+
"learning_rate": 7.696639741518578e-06,
|
| 282115 |
+
"loss": 0.493,
|
| 282116 |
+
"step": 143635
|
| 282117 |
+
},
|
| 282118 |
+
{
|
| 282119 |
+
"epoch": 1158.22,
|
| 282120 |
+
"learning_rate": 7.696558966074314e-06,
|
| 282121 |
+
"loss": 0.991,
|
| 282122 |
+
"step": 143640
|
| 282123 |
+
},
|
| 282124 |
+
{
|
| 282125 |
+
"epoch": 1158.26,
|
| 282126 |
+
"learning_rate": 7.696478190630048e-06,
|
| 282127 |
+
"loss": 0.3034,
|
| 282128 |
+
"step": 143645
|
| 282129 |
+
},
|
| 282130 |
+
{
|
| 282131 |
+
"epoch": 1158.3,
|
| 282132 |
+
"learning_rate": 7.696397415185784e-06,
|
| 282133 |
+
"loss": 0.314,
|
| 282134 |
+
"step": 143650
|
| 282135 |
+
},
|
| 282136 |
+
{
|
| 282137 |
+
"epoch": 1158.34,
|
| 282138 |
+
"learning_rate": 7.696316639741518e-06,
|
| 282139 |
+
"loss": 0.3291,
|
| 282140 |
+
"step": 143655
|
| 282141 |
+
},
|
| 282142 |
+
{
|
| 282143 |
+
"epoch": 1158.38,
|
| 282144 |
+
"learning_rate": 7.696235864297254e-06,
|
| 282145 |
+
"loss": 0.693,
|
| 282146 |
+
"step": 143660
|
| 282147 |
+
},
|
| 282148 |
+
{
|
| 282149 |
+
"epoch": 1158.42,
|
| 282150 |
+
"learning_rate": 7.696155088852988e-06,
|
| 282151 |
+
"loss": 0.9516,
|
| 282152 |
+
"step": 143665
|
| 282153 |
+
},
|
| 282154 |
+
{
|
| 282155 |
+
"epoch": 1158.46,
|
| 282156 |
+
"learning_rate": 7.696074313408724e-06,
|
| 282157 |
+
"loss": 0.2922,
|
| 282158 |
+
"step": 143670
|
| 282159 |
+
},
|
| 282160 |
+
{
|
| 282161 |
+
"epoch": 1158.5,
|
| 282162 |
+
"learning_rate": 7.695993537964458e-06,
|
| 282163 |
+
"loss": 0.2584,
|
| 282164 |
+
"step": 143675
|
| 282165 |
+
},
|
| 282166 |
+
{
|
| 282167 |
+
"epoch": 1158.54,
|
| 282168 |
+
"learning_rate": 7.695912762520194e-06,
|
| 282169 |
+
"loss": 0.313,
|
| 282170 |
+
"step": 143680
|
| 282171 |
+
},
|
| 282172 |
+
{
|
| 282173 |
+
"epoch": 1158.58,
|
| 282174 |
+
"learning_rate": 7.695831987075928e-06,
|
| 282175 |
+
"loss": 0.5341,
|
| 282176 |
+
"step": 143685
|
| 282177 |
+
},
|
| 282178 |
+
{
|
| 282179 |
+
"epoch": 1158.62,
|
| 282180 |
+
"learning_rate": 7.695751211631664e-06,
|
| 282181 |
+
"loss": 1.107,
|
| 282182 |
+
"step": 143690
|
| 282183 |
+
},
|
| 282184 |
+
{
|
| 282185 |
+
"epoch": 1158.66,
|
| 282186 |
+
"learning_rate": 7.6956704361874e-06,
|
| 282187 |
+
"loss": 0.4665,
|
| 282188 |
+
"step": 143695
|
| 282189 |
+
},
|
| 282190 |
+
{
|
| 282191 |
+
"epoch": 1158.7,
|
| 282192 |
+
"learning_rate": 7.695589660743134e-06,
|
| 282193 |
+
"loss": 0.2928,
|
| 282194 |
+
"step": 143700
|
| 282195 |
+
},
|
| 282196 |
+
{
|
| 282197 |
+
"epoch": 1158.74,
|
| 282198 |
+
"learning_rate": 7.69550888529887e-06,
|
| 282199 |
+
"loss": 0.3576,
|
| 282200 |
+
"step": 143705
|
| 282201 |
+
},
|
| 282202 |
+
{
|
| 282203 |
+
"epoch": 1158.78,
|
| 282204 |
+
"learning_rate": 7.695428109854604e-06,
|
| 282205 |
+
"loss": 0.6097,
|
| 282206 |
+
"step": 143710
|
| 282207 |
+
},
|
| 282208 |
+
{
|
| 282209 |
+
"epoch": 1158.82,
|
| 282210 |
+
"learning_rate": 7.69534733441034e-06,
|
| 282211 |
+
"loss": 0.9703,
|
| 282212 |
+
"step": 143715
|
| 282213 |
+
},
|
| 282214 |
+
{
|
| 282215 |
+
"epoch": 1158.86,
|
| 282216 |
+
"learning_rate": 7.695266558966074e-06,
|
| 282217 |
+
"loss": 0.2794,
|
| 282218 |
+
"step": 143720
|
| 282219 |
+
},
|
| 282220 |
+
{
|
| 282221 |
+
"epoch": 1158.9,
|
| 282222 |
+
"learning_rate": 7.69518578352181e-06,
|
| 282223 |
+
"loss": 0.2705,
|
| 282224 |
+
"step": 143725
|
| 282225 |
+
},
|
| 282226 |
+
{
|
| 282227 |
+
"epoch": 1158.94,
|
| 282228 |
+
"learning_rate": 7.695105008077544e-06,
|
| 282229 |
+
"loss": 0.299,
|
| 282230 |
+
"step": 143730
|
| 282231 |
+
},
|
| 282232 |
+
{
|
| 282233 |
+
"epoch": 1158.98,
|
| 282234 |
+
"learning_rate": 7.69502423263328e-06,
|
| 282235 |
+
"loss": 0.7515,
|
| 282236 |
+
"step": 143735
|
| 282237 |
+
},
|
| 282238 |
+
{
|
| 282239 |
+
"epoch": 1159.0,
|
| 282240 |
+
"eval_loss": 0.38360095024108887,
|
| 282241 |
+
"eval_runtime": 40.7226,
|
| 282242 |
+
"eval_samples_per_second": 20.554,
|
| 282243 |
+
"eval_steps_per_second": 0.663,
|
| 282244 |
+
"eval_wer": 0.1902169915410077,
|
| 282245 |
+
"step": 143737
|
| 282246 |
+
},
|
| 282247 |
+
{
|
| 282248 |
+
"epoch": 1149.02,
|
| 282249 |
+
"learning_rate": 7.694943457189014e-06,
|
| 282250 |
+
"loss": 0.3411,
|
| 282251 |
+
"step": 143740
|
| 282252 |
+
},
|
| 282253 |
+
{
|
| 282254 |
+
"epoch": 1149.06,
|
| 282255 |
+
"learning_rate": 7.69486268174475e-06,
|
| 282256 |
+
"loss": 0.2825,
|
| 282257 |
+
"step": 143745
|
| 282258 |
+
},
|
| 282259 |
+
{
|
| 282260 |
+
"epoch": 1149.1,
|
| 282261 |
+
"learning_rate": 7.694781906300486e-06,
|
| 282262 |
+
"loss": 0.3384,
|
| 282263 |
+
"step": 143750
|
| 282264 |
+
},
|
| 282265 |
+
{
|
| 282266 |
+
"epoch": 1149.14,
|
| 282267 |
+
"learning_rate": 7.69470113085622e-06,
|
| 282268 |
+
"loss": 0.3251,
|
| 282269 |
+
"step": 143755
|
| 282270 |
+
},
|
| 282271 |
+
{
|
| 282272 |
+
"epoch": 1149.18,
|
| 282273 |
+
"learning_rate": 7.694620355411956e-06,
|
| 282274 |
+
"loss": 0.6553,
|
| 282275 |
+
"step": 143760
|
| 282276 |
+
},
|
| 282277 |
+
{
|
| 282278 |
+
"epoch": 1149.22,
|
| 282279 |
+
"learning_rate": 7.69453957996769e-06,
|
| 282280 |
+
"loss": 0.8217,
|
| 282281 |
+
"step": 143765
|
| 282282 |
+
},
|
| 282283 |
+
{
|
| 282284 |
+
"epoch": 1149.26,
|
| 282285 |
+
"learning_rate": 7.694458804523426e-06,
|
| 282286 |
+
"loss": 0.3223,
|
| 282287 |
+
"step": 143770
|
| 282288 |
+
},
|
| 282289 |
+
{
|
| 282290 |
+
"epoch": 1149.3,
|
| 282291 |
+
"learning_rate": 7.69437802907916e-06,
|
| 282292 |
+
"loss": 0.2594,
|
| 282293 |
+
"step": 143775
|
| 282294 |
+
},
|
| 282295 |
+
{
|
| 282296 |
+
"epoch": 1149.34,
|
| 282297 |
+
"learning_rate": 7.694297253634896e-06,
|
| 282298 |
+
"loss": 0.3364,
|
| 282299 |
+
"step": 143780
|
| 282300 |
+
},
|
| 282301 |
+
{
|
| 282302 |
+
"epoch": 1149.38,
|
| 282303 |
+
"learning_rate": 7.69421647819063e-06,
|
| 282304 |
+
"loss": 0.7022,
|
| 282305 |
+
"step": 143785
|
| 282306 |
+
},
|
| 282307 |
+
{
|
| 282308 |
+
"epoch": 1149.42,
|
| 282309 |
+
"learning_rate": 7.694135702746365e-06,
|
| 282310 |
+
"loss": 0.7457,
|
| 282311 |
+
"step": 143790
|
| 282312 |
+
},
|
| 282313 |
+
{
|
| 282314 |
+
"epoch": 1149.46,
|
| 282315 |
+
"learning_rate": 7.6940549273021e-06,
|
| 282316 |
+
"loss": 0.2573,
|
| 282317 |
+
"step": 143795
|
| 282318 |
+
},
|
| 282319 |
+
{
|
| 282320 |
+
"epoch": 1149.5,
|
| 282321 |
+
"learning_rate": 7.693974151857835e-06,
|
| 282322 |
+
"loss": 0.2289,
|
| 282323 |
+
"step": 143800
|
| 282324 |
+
},
|
| 282325 |
+
{
|
| 282326 |
+
"epoch": 1149.54,
|
| 282327 |
+
"learning_rate": 7.69389337641357e-06,
|
| 282328 |
+
"loss": 0.3103,
|
| 282329 |
+
"step": 143805
|
| 282330 |
+
},
|
| 282331 |
+
{
|
| 282332 |
+
"epoch": 1149.58,
|
| 282333 |
+
"learning_rate": 7.693812600969305e-06,
|
| 282334 |
+
"loss": 0.6486,
|
| 282335 |
+
"step": 143810
|
| 282336 |
+
},
|
| 282337 |
+
{
|
| 282338 |
+
"epoch": 1149.62,
|
| 282339 |
+
"learning_rate": 7.693731825525041e-06,
|
| 282340 |
+
"loss": 0.7897,
|
| 282341 |
+
"step": 143815
|
| 282342 |
+
},
|
| 282343 |
+
{
|
| 282344 |
+
"epoch": 1149.66,
|
| 282345 |
+
"learning_rate": 7.693651050080775e-06,
|
| 282346 |
+
"loss": 0.2758,
|
| 282347 |
+
"step": 143820
|
| 282348 |
+
},
|
| 282349 |
+
{
|
| 282350 |
+
"epoch": 1149.7,
|
| 282351 |
+
"learning_rate": 7.693570274636511e-06,
|
| 282352 |
+
"loss": 0.2628,
|
| 282353 |
+
"step": 143825
|
| 282354 |
+
},
|
| 282355 |
+
{
|
| 282356 |
+
"epoch": 1149.74,
|
| 282357 |
+
"learning_rate": 7.693489499192245e-06,
|
| 282358 |
+
"loss": 0.4422,
|
| 282359 |
+
"step": 143830
|
| 282360 |
+
},
|
| 282361 |
+
{
|
| 282362 |
+
"epoch": 1149.78,
|
| 282363 |
+
"learning_rate": 7.693408723747981e-06,
|
| 282364 |
+
"loss": 0.7036,
|
| 282365 |
+
"step": 143835
|
| 282366 |
+
},
|
| 282367 |
+
{
|
| 282368 |
+
"epoch": 1149.82,
|
| 282369 |
+
"learning_rate": 7.693327948303715e-06,
|
| 282370 |
+
"loss": 0.7535,
|
| 282371 |
+
"step": 143840
|
| 282372 |
+
},
|
| 282373 |
+
{
|
| 282374 |
+
"epoch": 1149.86,
|
| 282375 |
+
"learning_rate": 7.693247172859451e-06,
|
| 282376 |
+
"loss": 0.2703,
|
| 282377 |
+
"step": 143845
|
| 282378 |
+
},
|
| 282379 |
+
{
|
| 282380 |
+
"epoch": 1149.9,
|
| 282381 |
+
"learning_rate": 7.693166397415185e-06,
|
| 282382 |
+
"loss": 0.3293,
|
| 282383 |
+
"step": 143850
|
| 282384 |
+
},
|
| 282385 |
+
{
|
| 282386 |
+
"epoch": 1149.94,
|
| 282387 |
+
"learning_rate": 7.693085621970921e-06,
|
| 282388 |
+
"loss": 0.2901,
|
| 282389 |
+
"step": 143855
|
| 282390 |
+
},
|
| 282391 |
+
{
|
| 282392 |
+
"epoch": 1149.98,
|
| 282393 |
+
"learning_rate": 7.693004846526655e-06,
|
| 282394 |
+
"loss": 0.7575,
|
| 282395 |
+
"step": 143860
|
| 282396 |
+
},
|
| 282397 |
+
{
|
| 282398 |
+
"epoch": 1150.0,
|
| 282399 |
+
"eval_loss": 0.330931693315506,
|
| 282400 |
+
"eval_runtime": 40.935,
|
| 282401 |
+
"eval_samples_per_second": 20.447,
|
| 282402 |
+
"eval_steps_per_second": 0.66,
|
| 282403 |
+
"eval_wer": 0.18041988628079894,
|
| 282404 |
+
"step": 143862
|
| 282405 |
+
},
|
| 282406 |
+
{
|
| 282407 |
+
"epoch": 1150.02,
|
| 282408 |
+
"learning_rate": 7.692924071082391e-06,
|
| 282409 |
+
"loss": 0.4115,
|
| 282410 |
+
"step": 143865
|
| 282411 |
+
},
|
| 282412 |
+
{
|
| 282413 |
+
"epoch": 1150.06,
|
| 282414 |
+
"learning_rate": 7.692843295638127e-06,
|
| 282415 |
+
"loss": 0.2711,
|
| 282416 |
+
"step": 143870
|
| 282417 |
+
},
|
| 282418 |
+
{
|
| 282419 |
+
"epoch": 1150.1,
|
| 282420 |
+
"learning_rate": 7.692762520193861e-06,
|
| 282421 |
+
"loss": 0.3022,
|
| 282422 |
+
"step": 143875
|
| 282423 |
+
},
|
| 282424 |
+
{
|
| 282425 |
+
"epoch": 1150.14,
|
| 282426 |
+
"learning_rate": 7.692681744749597e-06,
|
| 282427 |
+
"loss": 0.3806,
|
| 282428 |
+
"step": 143880
|
| 282429 |
+
},
|
| 282430 |
+
{
|
| 282431 |
+
"epoch": 1150.18,
|
| 282432 |
+
"learning_rate": 7.692600969305331e-06,
|
| 282433 |
+
"loss": 0.7295,
|
| 282434 |
+
"step": 143885
|
| 282435 |
+
},
|
| 282436 |
+
{
|
| 282437 |
+
"epoch": 1150.22,
|
| 282438 |
+
"learning_rate": 7.692520193861067e-06,
|
| 282439 |
+
"loss": 0.7231,
|
| 282440 |
+
"step": 143890
|
| 282441 |
+
},
|
| 282442 |
+
{
|
| 282443 |
+
"epoch": 1150.26,
|
| 282444 |
+
"learning_rate": 7.692439418416801e-06,
|
| 282445 |
+
"loss": 0.2712,
|
| 282446 |
+
"step": 143895
|
| 282447 |
+
},
|
| 282448 |
+
{
|
| 282449 |
+
"epoch": 1150.3,
|
| 282450 |
+
"learning_rate": 7.692358642972537e-06,
|
| 282451 |
+
"loss": 0.3067,
|
| 282452 |
+
"step": 143900
|
| 282453 |
+
},
|
| 282454 |
+
{
|
| 282455 |
+
"epoch": 1150.34,
|
| 282456 |
+
"learning_rate": 7.692277867528271e-06,
|
| 282457 |
+
"loss": 0.361,
|
| 282458 |
+
"step": 143905
|
| 282459 |
+
},
|
| 282460 |
+
{
|
| 282461 |
+
"epoch": 1150.38,
|
| 282462 |
+
"learning_rate": 7.692197092084007e-06,
|
| 282463 |
+
"loss": 0.7103,
|
| 282464 |
+
"step": 143910
|
| 282465 |
+
},
|
| 282466 |
+
{
|
| 282467 |
+
"epoch": 1150.42,
|
| 282468 |
+
"learning_rate": 7.692116316639741e-06,
|
| 282469 |
+
"loss": 0.8117,
|
| 282470 |
+
"step": 143915
|
| 282471 |
+
},
|
| 282472 |
+
{
|
| 282473 |
+
"epoch": 1150.46,
|
| 282474 |
+
"learning_rate": 7.692035541195477e-06,
|
| 282475 |
+
"loss": 0.2792,
|
| 282476 |
+
"step": 143920
|
| 282477 |
+
},
|
| 282478 |
+
{
|
| 282479 |
+
"epoch": 1150.5,
|
| 282480 |
+
"learning_rate": 7.691954765751213e-06,
|
| 282481 |
+
"loss": 0.2409,
|
| 282482 |
+
"step": 143925
|
| 282483 |
+
},
|
| 282484 |
+
{
|
| 282485 |
+
"epoch": 1150.54,
|
| 282486 |
+
"learning_rate": 7.691873990306947e-06,
|
| 282487 |
+
"loss": 0.4053,
|
| 282488 |
+
"step": 143930
|
| 282489 |
+
},
|
| 282490 |
+
{
|
| 282491 |
+
"epoch": 1150.58,
|
| 282492 |
+
"learning_rate": 7.691793214862683e-06,
|
| 282493 |
+
"loss": 0.7631,
|
| 282494 |
+
"step": 143935
|
| 282495 |
+
},
|
| 282496 |
+
{
|
| 282497 |
+
"epoch": 1150.62,
|
| 282498 |
+
"learning_rate": 7.691712439418417e-06,
|
| 282499 |
+
"loss": 0.817,
|
| 282500 |
+
"step": 143940
|
| 282501 |
+
},
|
| 282502 |
+
{
|
| 282503 |
+
"epoch": 1150.66,
|
| 282504 |
+
"learning_rate": 7.691631663974153e-06,
|
| 282505 |
+
"loss": 0.2555,
|
| 282506 |
+
"step": 143945
|
| 282507 |
+
},
|
| 282508 |
+
{
|
| 282509 |
+
"epoch": 1150.7,
|
| 282510 |
+
"learning_rate": 7.691550888529887e-06,
|
| 282511 |
+
"loss": 0.2972,
|
| 282512 |
+
"step": 143950
|
| 282513 |
+
},
|
| 282514 |
+
{
|
| 282515 |
+
"epoch": 1150.74,
|
| 282516 |
+
"learning_rate": 7.691470113085623e-06,
|
| 282517 |
+
"loss": 0.3903,
|
| 282518 |
+
"step": 143955
|
| 282519 |
+
},
|
| 282520 |
+
{
|
| 282521 |
+
"epoch": 1150.78,
|
| 282522 |
+
"learning_rate": 7.691389337641357e-06,
|
| 282523 |
+
"loss": 0.7996,
|
| 282524 |
+
"step": 143960
|
| 282525 |
+
},
|
| 282526 |
+
{
|
| 282527 |
+
"epoch": 1150.82,
|
| 282528 |
+
"learning_rate": 7.691308562197093e-06,
|
| 282529 |
+
"loss": 0.8304,
|
| 282530 |
+
"step": 143965
|
| 282531 |
+
},
|
| 282532 |
+
{
|
| 282533 |
+
"epoch": 1150.86,
|
| 282534 |
+
"learning_rate": 7.691227786752827e-06,
|
| 282535 |
+
"loss": 0.2814,
|
| 282536 |
+
"step": 143970
|
| 282537 |
+
},
|
| 282538 |
+
{
|
| 282539 |
+
"epoch": 1150.9,
|
| 282540 |
+
"learning_rate": 7.691147011308563e-06,
|
| 282541 |
+
"loss": 0.2936,
|
| 282542 |
+
"step": 143975
|
| 282543 |
+
},
|
| 282544 |
+
{
|
| 282545 |
+
"epoch": 1150.94,
|
| 282546 |
+
"learning_rate": 7.691066235864297e-06,
|
| 282547 |
+
"loss": 0.3127,
|
| 282548 |
+
"step": 143980
|
| 282549 |
+
},
|
| 282550 |
+
{
|
| 282551 |
+
"epoch": 1150.98,
|
| 282552 |
+
"learning_rate": 7.690985460420033e-06,
|
| 282553 |
+
"loss": 0.7676,
|
| 282554 |
+
"step": 143985
|
| 282555 |
+
},
|
| 282556 |
+
{
|
| 282557 |
+
"epoch": 1151.0,
|
| 282558 |
+
"eval_loss": 0.37501341104507446,
|
| 282559 |
+
"eval_runtime": 40.5276,
|
| 282560 |
+
"eval_samples_per_second": 20.677,
|
| 282561 |
+
"eval_steps_per_second": 0.666,
|
| 282562 |
+
"eval_wer": 0.1780933362916944,
|
| 282563 |
+
"step": 143987
|
| 282564 |
+
},
|
| 282565 |
+
{
|
| 282566 |
+
"epoch": 1161.02,
|
| 282567 |
+
"learning_rate": 7.690904684975768e-06,
|
| 282568 |
+
"loss": 0.371,
|
| 282569 |
+
"step": 143990
|
| 282570 |
+
},
|
| 282571 |
+
{
|
| 282572 |
+
"epoch": 1161.06,
|
| 282573 |
+
"learning_rate": 7.690823909531503e-06,
|
| 282574 |
+
"loss": 0.2585,
|
| 282575 |
+
"step": 143995
|
| 282576 |
+
},
|
| 282577 |
+
{
|
| 282578 |
+
"epoch": 1161.1,
|
| 282579 |
+
"learning_rate": 7.690743134087238e-06,
|
| 282580 |
+
"loss": 0.2695,
|
| 282581 |
+
"step": 144000
|
| 282582 |
+
},
|
| 282583 |
+
{
|
| 282584 |
+
"epoch": 1161.14,
|
| 282585 |
+
"learning_rate": 7.690662358642972e-06,
|
| 282586 |
+
"loss": 0.3722,
|
| 282587 |
+
"step": 144005
|
| 282588 |
+
},
|
| 282589 |
+
{
|
| 282590 |
+
"epoch": 1161.18,
|
| 282591 |
+
"learning_rate": 7.690581583198708e-06,
|
| 282592 |
+
"loss": 0.6405,
|
| 282593 |
+
"step": 144010
|
| 282594 |
+
},
|
| 282595 |
+
{
|
| 282596 |
+
"epoch": 1161.22,
|
| 282597 |
+
"learning_rate": 7.690500807754442e-06,
|
| 282598 |
+
"loss": 0.7858,
|
| 282599 |
+
"step": 144015
|
| 282600 |
+
},
|
| 282601 |
+
{
|
| 282602 |
+
"epoch": 1161.27,
|
| 282603 |
+
"learning_rate": 7.690420032310178e-06,
|
| 282604 |
+
"loss": 0.2549,
|
| 282605 |
+
"step": 144020
|
| 282606 |
+
},
|
| 282607 |
+
{
|
| 282608 |
+
"epoch": 1161.31,
|
| 282609 |
+
"learning_rate": 7.690339256865912e-06,
|
| 282610 |
+
"loss": 0.2691,
|
| 282611 |
+
"step": 144025
|
| 282612 |
+
},
|
| 282613 |
+
{
|
| 282614 |
+
"epoch": 1161.35,
|
| 282615 |
+
"learning_rate": 7.690258481421648e-06,
|
| 282616 |
+
"loss": 0.3902,
|
| 282617 |
+
"step": 144030
|
| 282618 |
+
},
|
| 282619 |
+
{
|
| 282620 |
+
"epoch": 1161.39,
|
| 282621 |
+
"learning_rate": 7.690177705977382e-06,
|
| 282622 |
+
"loss": 0.7926,
|
| 282623 |
+
"step": 144035
|
| 282624 |
+
},
|
| 282625 |
+
{
|
| 282626 |
+
"epoch": 1161.43,
|
| 282627 |
+
"learning_rate": 7.690096930533118e-06,
|
| 282628 |
+
"loss": 0.8472,
|
| 282629 |
+
"step": 144040
|
| 282630 |
+
},
|
| 282631 |
+
{
|
| 282632 |
+
"epoch": 1161.47,
|
| 282633 |
+
"learning_rate": 7.690016155088854e-06,
|
| 282634 |
+
"loss": 0.2255,
|
| 282635 |
+
"step": 144045
|
| 282636 |
+
},
|
| 282637 |
+
{
|
| 282638 |
+
"epoch": 1161.51,
|
| 282639 |
+
"learning_rate": 7.689935379644588e-06,
|
| 282640 |
+
"loss": 0.2868,
|
| 282641 |
+
"step": 144050
|
| 282642 |
+
},
|
| 282643 |
+
{
|
| 282644 |
+
"epoch": 1161.55,
|
| 282645 |
+
"learning_rate": 7.689854604200324e-06,
|
| 282646 |
+
"loss": 0.3652,
|
| 282647 |
+
"step": 144055
|
| 282648 |
+
},
|
| 282649 |
+
{
|
| 282650 |
+
"epoch": 1161.59,
|
| 282651 |
+
"learning_rate": 7.689773828756058e-06,
|
| 282652 |
+
"loss": 0.7302,
|
| 282653 |
+
"step": 144060
|
| 282654 |
+
},
|
| 282655 |
+
{
|
| 282656 |
+
"epoch": 1161.63,
|
| 282657 |
+
"learning_rate": 7.689693053311794e-06,
|
| 282658 |
+
"loss": 0.8197,
|
| 282659 |
+
"step": 144065
|
| 282660 |
+
},
|
| 282661 |
+
{
|
| 282662 |
+
"epoch": 1161.67,
|
| 282663 |
+
"learning_rate": 7.689612277867528e-06,
|
| 282664 |
+
"loss": 0.3005,
|
| 282665 |
+
"step": 144070
|
| 282666 |
+
},
|
| 282667 |
+
{
|
| 282668 |
+
"epoch": 1161.71,
|
| 282669 |
+
"learning_rate": 7.689531502423264e-06,
|
| 282670 |
+
"loss": 0.3147,
|
| 282671 |
+
"step": 144075
|
| 282672 |
+
},
|
| 282673 |
+
{
|
| 282674 |
+
"epoch": 1161.75,
|
| 282675 |
+
"learning_rate": 7.689450726978998e-06,
|
| 282676 |
+
"loss": 0.3716,
|
| 282677 |
+
"step": 144080
|
| 282678 |
+
},
|
| 282679 |
+
{
|
| 282680 |
+
"epoch": 1161.79,
|
| 282681 |
+
"learning_rate": 7.689369951534734e-06,
|
| 282682 |
+
"loss": 0.6532,
|
| 282683 |
+
"step": 144085
|
| 282684 |
+
},
|
| 282685 |
+
{
|
| 282686 |
+
"epoch": 1161.83,
|
| 282687 |
+
"learning_rate": 7.689289176090468e-06,
|
| 282688 |
+
"loss": 0.7379,
|
| 282689 |
+
"step": 144090
|
| 282690 |
+
},
|
| 282691 |
+
{
|
| 282692 |
+
"epoch": 1161.87,
|
| 282693 |
+
"learning_rate": 7.689208400646204e-06,
|
| 282694 |
+
"loss": 0.3081,
|
| 282695 |
+
"step": 144095
|
| 282696 |
+
},
|
| 282697 |
+
{
|
| 282698 |
+
"epoch": 1161.91,
|
| 282699 |
+
"learning_rate": 7.68912762520194e-06,
|
| 282700 |
+
"loss": 0.3046,
|
| 282701 |
+
"step": 144100
|
| 282702 |
+
},
|
| 282703 |
+
{
|
| 282704 |
+
"epoch": 1161.95,
|
| 282705 |
+
"learning_rate": 7.689046849757674e-06,
|
| 282706 |
+
"loss": 0.4244,
|
| 282707 |
+
"step": 144105
|
| 282708 |
+
},
|
| 282709 |
+
{
|
| 282710 |
+
"epoch": 1161.99,
|
| 282711 |
+
"learning_rate": 7.68896607431341e-06,
|
| 282712 |
+
"loss": 0.8756,
|
| 282713 |
+
"step": 144110
|
| 282714 |
+
},
|
| 282715 |
+
{
|
| 282716 |
+
"epoch": 1162.0,
|
| 282717 |
+
"eval_loss": 0.3087240755558014,
|
| 282718 |
+
"eval_runtime": 40.87,
|
| 282719 |
+
"eval_samples_per_second": 20.504,
|
| 282720 |
+
"eval_steps_per_second": 0.661,
|
| 282721 |
+
"eval_wer": 0.17917888563049852,
|
| 282722 |
+
"step": 144111
|
| 282723 |
+
},
|
| 282724 |
+
{
|
| 282725 |
+
"epoch": 1162.03,
|
| 282726 |
+
"learning_rate": 7.688885298869144e-06,
|
| 282727 |
+
"loss": 0.288,
|
| 282728 |
+
"step": 144115
|
| 282729 |
+
},
|
| 282730 |
+
{
|
| 282731 |
+
"epoch": 1162.07,
|
| 282732 |
+
"learning_rate": 7.68880452342488e-06,
|
| 282733 |
+
"loss": 0.2308,
|
| 282734 |
+
"step": 144120
|
| 282735 |
+
},
|
| 282736 |
+
{
|
| 282737 |
+
"epoch": 1162.11,
|
| 282738 |
+
"learning_rate": 7.688723747980614e-06,
|
| 282739 |
+
"loss": 0.2779,
|
| 282740 |
+
"step": 144125
|
| 282741 |
+
},
|
| 282742 |
+
{
|
| 282743 |
+
"epoch": 1162.15,
|
| 282744 |
+
"learning_rate": 7.68864297253635e-06,
|
| 282745 |
+
"loss": 0.367,
|
| 282746 |
+
"step": 144130
|
| 282747 |
+
},
|
| 282748 |
+
{
|
| 282749 |
+
"epoch": 1162.19,
|
| 282750 |
+
"learning_rate": 7.688562197092084e-06,
|
| 282751 |
+
"loss": 0.8518,
|
| 282752 |
+
"step": 144135
|
| 282753 |
+
},
|
| 282754 |
+
{
|
| 282755 |
+
"epoch": 1162.23,
|
| 282756 |
+
"learning_rate": 7.68848142164782e-06,
|
| 282757 |
+
"loss": 0.5858,
|
| 282758 |
+
"step": 144140
|
| 282759 |
+
},
|
| 282760 |
+
{
|
| 282761 |
+
"epoch": 1162.27,
|
| 282762 |
+
"learning_rate": 7.688400646203554e-06,
|
| 282763 |
+
"loss": 0.2899,
|
| 282764 |
+
"step": 144145
|
| 282765 |
+
},
|
| 282766 |
+
{
|
| 282767 |
+
"epoch": 1162.31,
|
| 282768 |
+
"learning_rate": 7.68831987075929e-06,
|
| 282769 |
+
"loss": 0.2828,
|
| 282770 |
+
"step": 144150
|
| 282771 |
+
},
|
| 282772 |
+
{
|
| 282773 |
+
"epoch": 1162.35,
|
| 282774 |
+
"learning_rate": 7.688239095315024e-06,
|
| 282775 |
+
"loss": 0.3602,
|
| 282776 |
+
"step": 144155
|
| 282777 |
+
},
|
| 282778 |
+
{
|
| 282779 |
+
"epoch": 1162.4,
|
| 282780 |
+
"learning_rate": 7.68815831987076e-06,
|
| 282781 |
+
"loss": 0.8127,
|
| 282782 |
+
"step": 144160
|
| 282783 |
+
},
|
| 282784 |
+
{
|
| 282785 |
+
"epoch": 1162.44,
|
| 282786 |
+
"learning_rate": 7.688077544426495e-06,
|
| 282787 |
+
"loss": 0.6768,
|
| 282788 |
+
"step": 144165
|
| 282789 |
+
},
|
| 282790 |
+
{
|
| 282791 |
+
"epoch": 1162.48,
|
| 282792 |
+
"learning_rate": 7.68799676898223e-06,
|
| 282793 |
+
"loss": 0.2627,
|
| 282794 |
+
"step": 144170
|
| 282795 |
+
},
|
| 282796 |
+
{
|
| 282797 |
+
"epoch": 1162.52,
|
| 282798 |
+
"learning_rate": 7.687915993537965e-06,
|
| 282799 |
+
"loss": 0.2619,
|
| 282800 |
+
"step": 144175
|
| 282801 |
+
},
|
| 282802 |
+
{
|
| 282803 |
+
"epoch": 1162.56,
|
| 282804 |
+
"learning_rate": 7.6878352180937e-06,
|
| 282805 |
+
"loss": 0.3976,
|
| 282806 |
+
"step": 144180
|
| 282807 |
+
},
|
| 282808 |
+
{
|
| 282809 |
+
"epoch": 1162.6,
|
| 282810 |
+
"learning_rate": 7.687754442649435e-06,
|
| 282811 |
+
"loss": 0.8505,
|
| 282812 |
+
"step": 144185
|
| 282813 |
+
},
|
| 282814 |
+
{
|
| 282815 |
+
"epoch": 1162.64,
|
| 282816 |
+
"learning_rate": 7.68767366720517e-06,
|
| 282817 |
+
"loss": 0.6324,
|
| 282818 |
+
"step": 144190
|
| 282819 |
+
},
|
| 282820 |
+
{
|
| 282821 |
+
"epoch": 1162.68,
|
| 282822 |
+
"learning_rate": 7.687592891760905e-06,
|
| 282823 |
+
"loss": 0.2943,
|
| 282824 |
+
"step": 144195
|
| 282825 |
+
},
|
| 282826 |
+
{
|
| 282827 |
+
"epoch": 1162.72,
|
| 282828 |
+
"learning_rate": 7.68751211631664e-06,
|
| 282829 |
+
"loss": 0.4165,
|
| 282830 |
+
"step": 144200
|
| 282831 |
+
},
|
| 282832 |
+
{
|
| 282833 |
+
"epoch": 1162.76,
|
| 282834 |
+
"learning_rate": 7.687431340872375e-06,
|
| 282835 |
+
"loss": 0.4133,
|
| 282836 |
+
"step": 144205
|
| 282837 |
+
},
|
| 282838 |
+
{
|
| 282839 |
+
"epoch": 1162.8,
|
| 282840 |
+
"learning_rate": 7.68735056542811e-06,
|
| 282841 |
+
"loss": 0.8709,
|
| 282842 |
+
"step": 144210
|
| 282843 |
+
},
|
| 282844 |
+
{
|
| 282845 |
+
"epoch": 1162.84,
|
| 282846 |
+
"learning_rate": 7.687269789983845e-06,
|
| 282847 |
+
"loss": 0.5874,
|
| 282848 |
+
"step": 144215
|
| 282849 |
+
},
|
| 282850 |
+
{
|
| 282851 |
+
"epoch": 1162.88,
|
| 282852 |
+
"learning_rate": 7.687189014539581e-06,
|
| 282853 |
+
"loss": 0.2829,
|
| 282854 |
+
"step": 144220
|
| 282855 |
+
},
|
| 282856 |
+
{
|
| 282857 |
+
"epoch": 1162.92,
|
| 282858 |
+
"learning_rate": 7.687108239095315e-06,
|
| 282859 |
+
"loss": 0.2959,
|
| 282860 |
+
"step": 144225
|
| 282861 |
+
},
|
| 282862 |
+
{
|
| 282863 |
+
"epoch": 1162.96,
|
| 282864 |
+
"learning_rate": 7.687027463651051e-06,
|
| 282865 |
+
"loss": 0.4034,
|
| 282866 |
+
"step": 144230
|
| 282867 |
+
},
|
| 282868 |
+
{
|
| 282869 |
+
"epoch": 1163.0,
|
| 282870 |
+
"learning_rate": 7.686946688206785e-06,
|
| 282871 |
+
"loss": 1.1425,
|
| 282872 |
+
"step": 144235
|
| 282873 |
+
},
|
| 282874 |
+
{
|
| 282875 |
+
"epoch": 1163.0,
|
| 282876 |
+
"eval_loss": 0.3543190360069275,
|
| 282877 |
+
"eval_runtime": 42.4164,
|
| 282878 |
+
"eval_samples_per_second": 19.756,
|
| 282879 |
+
"eval_steps_per_second": 0.637,
|
| 282880 |
+
"eval_wer": 0.16862858380876725,
|
| 282881 |
+
"step": 144235
|
| 282882 |
+
},
|
| 282883 |
+
{
|
| 282884 |
+
"epoch": 1163.04,
|
| 282885 |
+
"learning_rate": 7.686865912762521e-06,
|
| 282886 |
+
"loss": 0.3994,
|
| 282887 |
+
"step": 144240
|
| 282888 |
+
},
|
| 282889 |
+
{
|
| 282890 |
+
"epoch": 1163.08,
|
| 282891 |
+
"learning_rate": 7.686785137318255e-06,
|
| 282892 |
+
"loss": 0.2606,
|
| 282893 |
+
"step": 144245
|
| 282894 |
+
},
|
| 282895 |
+
{
|
| 282896 |
+
"epoch": 1163.12,
|
| 282897 |
+
"learning_rate": 7.686704361873991e-06,
|
| 282898 |
+
"loss": 0.2929,
|
| 282899 |
+
"step": 144250
|
| 282900 |
+
},
|
| 282901 |
+
{
|
| 282902 |
+
"epoch": 1163.16,
|
| 282903 |
+
"learning_rate": 7.686623586429725e-06,
|
| 282904 |
+
"loss": 0.5009,
|
| 282905 |
+
"step": 144255
|
| 282906 |
+
},
|
| 282907 |
+
{
|
| 282908 |
+
"epoch": 1163.2,
|
| 282909 |
+
"learning_rate": 7.686542810985461e-06,
|
| 282910 |
+
"loss": 1.1961,
|
| 282911 |
+
"step": 144260
|
| 282912 |
+
},
|
| 282913 |
+
{
|
| 282914 |
+
"epoch": 1163.24,
|
| 282915 |
+
"learning_rate": 7.686462035541195e-06,
|
| 282916 |
+
"loss": 0.3198,
|
| 282917 |
+
"step": 144265
|
| 282918 |
+
},
|
| 282919 |
+
{
|
| 282920 |
+
"epoch": 1163.28,
|
| 282921 |
+
"learning_rate": 7.686381260096931e-06,
|
| 282922 |
+
"loss": 0.2407,
|
| 282923 |
+
"step": 144270
|
| 282924 |
+
},
|
| 282925 |
+
{
|
| 282926 |
+
"epoch": 1163.32,
|
| 282927 |
+
"learning_rate": 7.686300484652667e-06,
|
| 282928 |
+
"loss": 0.3547,
|
| 282929 |
+
"step": 144275
|
| 282930 |
+
},
|
| 282931 |
+
{
|
| 282932 |
+
"epoch": 1163.36,
|
| 282933 |
+
"learning_rate": 7.686219709208401e-06,
|
| 282934 |
+
"loss": 0.5029,
|
| 282935 |
+
"step": 144280
|
| 282936 |
+
},
|
| 282937 |
+
{
|
| 282938 |
+
"epoch": 1163.4,
|
| 282939 |
+
"learning_rate": 7.686138933764137e-06,
|
| 282940 |
+
"loss": 1.2116,
|
| 282941 |
+
"step": 144285
|
| 282942 |
+
},
|
| 282943 |
+
{
|
| 282944 |
+
"epoch": 1163.44,
|
| 282945 |
+
"learning_rate": 7.686058158319871e-06,
|
| 282946 |
+
"loss": 0.3207,
|
| 282947 |
+
"step": 144290
|
| 282948 |
+
},
|
| 282949 |
+
{
|
| 282950 |
+
"epoch": 1163.48,
|
| 282951 |
+
"learning_rate": 7.685977382875607e-06,
|
| 282952 |
+
"loss": 0.2658,
|
| 282953 |
+
"step": 144295
|
| 282954 |
+
},
|
| 282955 |
+
{
|
| 282956 |
+
"epoch": 1163.52,
|
| 282957 |
+
"learning_rate": 7.685896607431341e-06,
|
| 282958 |
+
"loss": 0.4059,
|
| 282959 |
+
"step": 144300
|
| 282960 |
+
},
|
| 282961 |
+
{
|
| 282962 |
+
"epoch": 1163.56,
|
| 282963 |
+
"learning_rate": 7.685815831987077e-06,
|
| 282964 |
+
"loss": 0.4165,
|
| 282965 |
+
"step": 144305
|
| 282966 |
+
},
|
| 282967 |
+
{
|
| 282968 |
+
"epoch": 1163.6,
|
| 282969 |
+
"learning_rate": 7.685735056542811e-06,
|
| 282970 |
+
"loss": 1.1251,
|
| 282971 |
+
"step": 144310
|
| 282972 |
+
},
|
| 282973 |
+
{
|
| 282974 |
+
"epoch": 1163.64,
|
| 282975 |
+
"learning_rate": 7.685654281098547e-06,
|
| 282976 |
+
"loss": 0.2963,
|
| 282977 |
+
"step": 144315
|
| 282978 |
+
},
|
| 282979 |
+
{
|
| 282980 |
+
"epoch": 1163.68,
|
| 282981 |
+
"learning_rate": 7.685573505654281e-06,
|
| 282982 |
+
"loss": 0.3359,
|
| 282983 |
+
"step": 144320
|
| 282984 |
+
},
|
| 282985 |
+
{
|
| 282986 |
+
"epoch": 1163.72,
|
| 282987 |
+
"learning_rate": 7.685492730210017e-06,
|
| 282988 |
+
"loss": 0.3135,
|
| 282989 |
+
"step": 144325
|
| 282990 |
+
},
|
| 282991 |
+
{
|
| 282992 |
+
"epoch": 1163.76,
|
| 282993 |
+
"learning_rate": 7.685411954765751e-06,
|
| 282994 |
+
"loss": 0.4269,
|
| 282995 |
+
"step": 144330
|
| 282996 |
+
},
|
| 282997 |
+
{
|
| 282998 |
+
"epoch": 1163.8,
|
| 282999 |
+
"learning_rate": 7.685331179321487e-06,
|
| 283000 |
+
"loss": 1.1198,
|
| 283001 |
+
"step": 144335
|
| 283002 |
+
},
|
| 283003 |
+
{
|
| 283004 |
+
"epoch": 1163.84,
|
| 283005 |
+
"learning_rate": 7.685250403877223e-06,
|
| 283006 |
+
"loss": 0.3009,
|
| 283007 |
+
"step": 144340
|
| 283008 |
+
},
|
| 283009 |
+
{
|
| 283010 |
+
"epoch": 1163.88,
|
| 283011 |
+
"learning_rate": 7.685169628432957e-06,
|
| 283012 |
+
"loss": 0.2363,
|
| 283013 |
+
"step": 144345
|
| 283014 |
+
},
|
| 283015 |
+
{
|
| 283016 |
+
"epoch": 1163.92,
|
| 283017 |
+
"learning_rate": 7.685088852988693e-06,
|
| 283018 |
+
"loss": 0.286,
|
| 283019 |
+
"step": 144350
|
| 283020 |
+
},
|
| 283021 |
+
{
|
| 283022 |
+
"epoch": 1163.96,
|
| 283023 |
+
"learning_rate": 7.685008077544427e-06,
|
| 283024 |
+
"loss": 0.6232,
|
| 283025 |
+
"step": 144355
|
| 283026 |
+
},
|
| 283027 |
+
{
|
| 283028 |
+
"epoch": 1164.0,
|
| 283029 |
+
"eval_loss": 0.46291494369506836,
|
| 283030 |
+
"eval_runtime": 40.1634,
|
| 283031 |
+
"eval_samples_per_second": 20.84,
|
| 283032 |
+
"eval_steps_per_second": 0.672,
|
| 283033 |
+
"eval_wer": 0.1854970934565509,
|
| 283034 |
+
"step": 144359
|
| 283035 |
+
},
|
| 283036 |
+
{
|
| 283037 |
+
"epoch": 1164.01,
|
| 283038 |
+
"learning_rate": 7.684927302100163e-06,
|
| 283039 |
+
"loss": 0.39,
|
| 283040 |
+
"step": 144360
|
| 283041 |
+
},
|
| 283042 |
+
{
|
| 283043 |
+
"epoch": 1164.05,
|
| 283044 |
+
"learning_rate": 7.684846526655897e-06,
|
| 283045 |
+
"loss": 0.2847,
|
| 283046 |
+
"step": 144365
|
| 283047 |
+
},
|
| 283048 |
+
{
|
| 283049 |
+
"epoch": 1164.09,
|
| 283050 |
+
"learning_rate": 7.684765751211633e-06,
|
| 283051 |
+
"loss": 0.294,
|
| 283052 |
+
"step": 144370
|
| 283053 |
+
},
|
| 283054 |
+
{
|
| 283055 |
+
"epoch": 1164.13,
|
| 283056 |
+
"learning_rate": 7.684684975767367e-06,
|
| 283057 |
+
"loss": 0.3235,
|
| 283058 |
+
"step": 144375
|
| 283059 |
+
},
|
| 283060 |
+
{
|
| 283061 |
+
"epoch": 1164.17,
|
| 283062 |
+
"learning_rate": 7.684604200323102e-06,
|
| 283063 |
+
"loss": 0.4361,
|
| 283064 |
+
"step": 144380
|
| 283065 |
+
},
|
| 283066 |
+
{
|
| 283067 |
+
"epoch": 1164.21,
|
| 283068 |
+
"learning_rate": 7.684523424878837e-06,
|
| 283069 |
+
"loss": 1.1416,
|
| 283070 |
+
"step": 144385
|
| 283071 |
+
},
|
| 283072 |
+
{
|
| 283073 |
+
"epoch": 1164.25,
|
| 283074 |
+
"learning_rate": 7.684442649434572e-06,
|
| 283075 |
+
"loss": 0.3318,
|
| 283076 |
+
"step": 144390
|
| 283077 |
+
},
|
| 283078 |
+
{
|
| 283079 |
+
"epoch": 1164.29,
|
| 283080 |
+
"learning_rate": 7.684361873990308e-06,
|
| 283081 |
+
"loss": 0.2674,
|
| 283082 |
+
"step": 144395
|
| 283083 |
+
},
|
| 283084 |
+
{
|
| 283085 |
+
"epoch": 1164.33,
|
| 283086 |
+
"learning_rate": 7.684281098546042e-06,
|
| 283087 |
+
"loss": 0.295,
|
| 283088 |
+
"step": 144400
|
| 283089 |
+
},
|
| 283090 |
+
{
|
| 283091 |
+
"epoch": 1164.37,
|
| 283092 |
+
"learning_rate": 7.684200323101778e-06,
|
| 283093 |
+
"loss": 0.4393,
|
| 283094 |
+
"step": 144405
|
| 283095 |
+
},
|
| 283096 |
+
{
|
| 283097 |
+
"epoch": 1164.41,
|
| 283098 |
+
"learning_rate": 7.684119547657512e-06,
|
| 283099 |
+
"loss": 1.0585,
|
| 283100 |
+
"step": 144410
|
| 283101 |
+
},
|
| 283102 |
+
{
|
| 283103 |
+
"epoch": 1164.45,
|
| 283104 |
+
"learning_rate": 7.684038772213248e-06,
|
| 283105 |
+
"loss": 0.312,
|
| 283106 |
+
"step": 144415
|
| 283107 |
+
},
|
| 283108 |
+
{
|
| 283109 |
+
"epoch": 1164.49,
|
| 283110 |
+
"learning_rate": 7.683957996768982e-06,
|
| 283111 |
+
"loss": 0.2595,
|
| 283112 |
+
"step": 144420
|
| 283113 |
+
},
|
| 283114 |
+
{
|
| 283115 |
+
"epoch": 1164.53,
|
| 283116 |
+
"learning_rate": 7.683877221324718e-06,
|
| 283117 |
+
"loss": 0.2933,
|
| 283118 |
+
"step": 144425
|
| 283119 |
+
},
|
| 283120 |
+
{
|
| 283121 |
+
"epoch": 1164.57,
|
| 283122 |
+
"learning_rate": 7.683796445880452e-06,
|
| 283123 |
+
"loss": 0.515,
|
| 283124 |
+
"step": 144430
|
| 283125 |
+
},
|
| 283126 |
+
{
|
| 283127 |
+
"epoch": 1164.61,
|
| 283128 |
+
"learning_rate": 7.683715670436188e-06,
|
| 283129 |
+
"loss": 0.9295,
|
| 283130 |
+
"step": 144435
|
| 283131 |
+
},
|
| 283132 |
+
{
|
| 283133 |
+
"epoch": 1164.65,
|
| 283134 |
+
"learning_rate": 7.683634894991922e-06,
|
| 283135 |
+
"loss": 0.2886,
|
| 283136 |
+
"step": 144440
|
| 283137 |
+
},
|
| 283138 |
+
{
|
| 283139 |
+
"epoch": 1164.69,
|
| 283140 |
+
"learning_rate": 7.683554119547658e-06,
|
| 283141 |
+
"loss": 0.3172,
|
| 283142 |
+
"step": 144445
|
| 283143 |
+
},
|
| 283144 |
+
{
|
| 283145 |
+
"epoch": 1164.73,
|
| 283146 |
+
"learning_rate": 7.683473344103394e-06,
|
| 283147 |
+
"loss": 0.354,
|
| 283148 |
+
"step": 144450
|
| 283149 |
+
},
|
| 283150 |
+
{
|
| 283151 |
+
"epoch": 1164.77,
|
| 283152 |
+
"learning_rate": 7.683392568659128e-06,
|
| 283153 |
+
"loss": 0.4889,
|
| 283154 |
+
"step": 144455
|
| 283155 |
+
},
|
| 283156 |
+
{
|
| 283157 |
+
"epoch": 1164.81,
|
| 283158 |
+
"learning_rate": 7.683311793214864e-06,
|
| 283159 |
+
"loss": 0.9191,
|
| 283160 |
+
"step": 144460
|
| 283161 |
+
},
|
| 283162 |
+
{
|
| 283163 |
+
"epoch": 1164.85,
|
| 283164 |
+
"learning_rate": 7.683231017770598e-06,
|
| 283165 |
+
"loss": 0.3006,
|
| 283166 |
+
"step": 144465
|
| 283167 |
+
},
|
| 283168 |
+
{
|
| 283169 |
+
"epoch": 1164.89,
|
| 283170 |
+
"learning_rate": 7.683150242326334e-06,
|
| 283171 |
+
"loss": 0.309,
|
| 283172 |
+
"step": 144470
|
| 283173 |
+
},
|
| 283174 |
+
{
|
| 283175 |
+
"epoch": 1164.93,
|
| 283176 |
+
"learning_rate": 7.683069466882068e-06,
|
| 283177 |
+
"loss": 1.1332,
|
| 283178 |
+
"step": 144475
|
| 283179 |
+
},
|
| 283180 |
+
{
|
| 283181 |
+
"epoch": 1164.97,
|
| 283182 |
+
"learning_rate": 7.682988691437804e-06,
|
| 283183 |
+
"loss": 0.5342,
|
| 283184 |
+
"step": 144480
|
| 283185 |
+
},
|
| 283186 |
+
{
|
| 283187 |
+
"epoch": 1165.0,
|
| 283188 |
+
"eval_loss": 0.3511020839214325,
|
| 283189 |
+
"eval_runtime": 41.8487,
|
| 283190 |
+
"eval_samples_per_second": 20.001,
|
| 283191 |
+
"eval_steps_per_second": 0.645,
|
| 283192 |
+
"eval_wer": 0.1758169934640523,
|
| 283193 |
+
"step": 144483
|
| 283194 |
+
},
|
| 283195 |
+
{
|
| 283196 |
+
"epoch": 1165.02,
|
| 283197 |
+
"learning_rate": 7.682907915993538e-06,
|
| 283198 |
+
"loss": 0.2721,
|
| 283199 |
+
"step": 144485
|
| 283200 |
+
},
|
| 283201 |
+
{
|
| 283202 |
+
"epoch": 1165.06,
|
| 283203 |
+
"learning_rate": 7.682827140549274e-06,
|
| 283204 |
+
"loss": 0.2887,
|
| 283205 |
+
"step": 144490
|
| 283206 |
+
},
|
| 283207 |
+
{
|
| 283208 |
+
"epoch": 1165.1,
|
| 283209 |
+
"learning_rate": 7.682746365105008e-06,
|
| 283210 |
+
"loss": 0.339,
|
| 283211 |
+
"step": 144495
|
| 283212 |
+
},
|
| 283213 |
+
{
|
| 283214 |
+
"epoch": 1165.14,
|
| 283215 |
+
"learning_rate": 7.682665589660744e-06,
|
| 283216 |
+
"loss": 0.2786,
|
| 283217 |
+
"step": 144500
|
| 283218 |
+
},
|
| 283219 |
+
{
|
| 283220 |
+
"epoch": 1165.18,
|
| 283221 |
+
"learning_rate": 7.682584814216478e-06,
|
| 283222 |
+
"loss": 0.5607,
|
| 283223 |
+
"step": 144505
|
| 283224 |
+
},
|
| 283225 |
+
{
|
| 283226 |
+
"epoch": 1165.22,
|
| 283227 |
+
"learning_rate": 7.682504038772214e-06,
|
| 283228 |
+
"loss": 0.9914,
|
| 283229 |
+
"step": 144510
|
| 283230 |
+
},
|
| 283231 |
+
{
|
| 283232 |
+
"epoch": 1165.26,
|
| 283233 |
+
"learning_rate": 7.68242326332795e-06,
|
| 283234 |
+
"loss": 0.2515,
|
| 283235 |
+
"step": 144515
|
| 283236 |
+
},
|
| 283237 |
+
{
|
| 283238 |
+
"epoch": 1165.3,
|
| 283239 |
+
"learning_rate": 7.682342487883684e-06,
|
| 283240 |
+
"loss": 0.3679,
|
| 283241 |
+
"step": 144520
|
| 283242 |
+
},
|
| 283243 |
+
{
|
| 283244 |
+
"epoch": 1165.34,
|
| 283245 |
+
"learning_rate": 7.68226171243942e-06,
|
| 283246 |
+
"loss": 0.3706,
|
| 283247 |
+
"step": 144525
|
| 283248 |
+
},
|
| 283249 |
+
{
|
| 283250 |
+
"epoch": 1165.38,
|
| 283251 |
+
"learning_rate": 7.682180936995154e-06,
|
| 283252 |
+
"loss": 0.5414,
|
| 283253 |
+
"step": 144530
|
| 283254 |
+
},
|
| 283255 |
+
{
|
| 283256 |
+
"epoch": 1165.42,
|
| 283257 |
+
"learning_rate": 7.68210016155089e-06,
|
| 283258 |
+
"loss": 1.0052,
|
| 283259 |
+
"step": 144535
|
| 283260 |
+
},
|
| 283261 |
+
{
|
| 283262 |
+
"epoch": 1165.46,
|
| 283263 |
+
"learning_rate": 7.682019386106624e-06,
|
| 283264 |
+
"loss": 0.2638,
|
| 283265 |
+
"step": 144540
|
| 283266 |
+
},
|
| 283267 |
+
{
|
| 283268 |
+
"epoch": 1165.5,
|
| 283269 |
+
"learning_rate": 7.68193861066236e-06,
|
| 283270 |
+
"loss": 0.2948,
|
| 283271 |
+
"step": 144545
|
| 283272 |
+
},
|
| 283273 |
+
{
|
| 283274 |
+
"epoch": 1165.54,
|
| 283275 |
+
"learning_rate": 7.681857835218094e-06,
|
| 283276 |
+
"loss": 0.4818,
|
| 283277 |
+
"step": 144550
|
| 283278 |
+
},
|
| 283279 |
+
{
|
| 283280 |
+
"epoch": 1165.58,
|
| 283281 |
+
"learning_rate": 7.68177705977383e-06,
|
| 283282 |
+
"loss": 0.6016,
|
| 283283 |
+
"step": 144555
|
| 283284 |
+
},
|
| 283285 |
+
{
|
| 283286 |
+
"epoch": 1165.62,
|
| 283287 |
+
"learning_rate": 7.681696284329564e-06,
|
| 283288 |
+
"loss": 0.8445,
|
| 283289 |
+
"step": 144560
|
| 283290 |
+
},
|
| 283291 |
+
{
|
| 283292 |
+
"epoch": 1165.66,
|
| 283293 |
+
"learning_rate": 7.6816155088853e-06,
|
| 283294 |
+
"loss": 0.246,
|
| 283295 |
+
"step": 144565
|
| 283296 |
+
},
|
| 283297 |
+
{
|
| 283298 |
+
"epoch": 1165.7,
|
| 283299 |
+
"learning_rate": 7.681534733441035e-06,
|
| 283300 |
+
"loss": 0.2843,
|
| 283301 |
+
"step": 144570
|
| 283302 |
+
},
|
| 283303 |
+
{
|
| 283304 |
+
"epoch": 1165.74,
|
| 283305 |
+
"learning_rate": 7.68145395799677e-06,
|
| 283306 |
+
"loss": 0.3326,
|
| 283307 |
+
"step": 144575
|
| 283308 |
+
},
|
| 283309 |
+
{
|
| 283310 |
+
"epoch": 1165.78,
|
| 283311 |
+
"learning_rate": 7.681373182552505e-06,
|
| 283312 |
+
"loss": 0.5409,
|
| 283313 |
+
"step": 144580
|
| 283314 |
+
},
|
| 283315 |
+
{
|
| 283316 |
+
"epoch": 1165.82,
|
| 283317 |
+
"learning_rate": 7.68129240710824e-06,
|
| 283318 |
+
"loss": 0.9051,
|
| 283319 |
+
"step": 144585
|
| 283320 |
+
},
|
| 283321 |
+
{
|
| 283322 |
+
"epoch": 1165.86,
|
| 283323 |
+
"learning_rate": 7.681211631663975e-06,
|
| 283324 |
+
"loss": 0.2603,
|
| 283325 |
+
"step": 144590
|
| 283326 |
+
},
|
| 283327 |
+
{
|
| 283328 |
+
"epoch": 1165.9,
|
| 283329 |
+
"learning_rate": 7.68113085621971e-06,
|
| 283330 |
+
"loss": 0.2964,
|
| 283331 |
+
"step": 144595
|
| 283332 |
+
},
|
| 283333 |
+
{
|
| 283334 |
+
"epoch": 1165.94,
|
| 283335 |
+
"learning_rate": 7.681050080775445e-06,
|
| 283336 |
+
"loss": 0.368,
|
| 283337 |
+
"step": 144600
|
| 283338 |
+
},
|
| 283339 |
+
{
|
| 283340 |
+
"epoch": 1165.98,
|
| 283341 |
+
"learning_rate": 7.68096930533118e-06,
|
| 283342 |
+
"loss": 0.6023,
|
| 283343 |
+
"step": 144605
|
| 283344 |
+
},
|
| 283345 |
+
{
|
| 283346 |
+
"epoch": 1166.0,
|
| 283347 |
+
"eval_loss": 0.3452383875846863,
|
| 283348 |
+
"eval_runtime": 39.7586,
|
| 283349 |
+
"eval_samples_per_second": 21.052,
|
| 283350 |
+
"eval_steps_per_second": 0.679,
|
| 283351 |
+
"eval_wer": 0.16790141568253503,
|
| 283352 |
+
"step": 144607
|
| 283353 |
}
|
| 283354 |
],
|
| 283355 |
+
"max_steps": 620000,
|
| 283356 |
"num_train_epochs": 5000,
|
| 283357 |
+
"total_flos": 4.0692196383491044e+20,
|
| 283358 |
"trial_name": null,
|
| 283359 |
"trial_params": null
|
| 283360 |
}
|
model-bin/finetune/base/{checkpoint-143987 β checkpoint-144607}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/log/1630221559.36492/events.out.tfevents.1630221559.cc93b136ebf5.1086.79
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0594dd42829acb9ee7fbfeb17ecf80dd36a0a8b3e97683233a8b0f49603801ee
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1630221982.8780048/events.out.tfevents.1630221982.cc93b136ebf5.1086.81
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c3e657aeef83b9573216be1aceca087ce842ca72fbeadafe924e4f96506e1ca
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1630222527.1516037/events.out.tfevents.1630222527.cc93b136ebf5.1086.83
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9a067a10447e81e33307ef999d3bc2f7e95445f624523607e8e9d9741846531
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1630222956.4361434/events.out.tfevents.1630222956.cc93b136ebf5.1086.85
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:932bd83b9ffa2ed5a1d58e104660fe88b14386c61cd8885cadf6380aa3a145a4
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1630223389.0244238/events.out.tfevents.1630223389.cc93b136ebf5.1086.87
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd5406d83da463522448dfda79c12603cd5d5880e1dd7fe071824ad74b55a5c8
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/events.out.tfevents.1630221559.cc93b136ebf5.1086.78
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:845a7c908d4872385b63f8632701506415863cbff59cad99ffa93372b2f20b82
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1630221982.cc93b136ebf5.1086.80
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f7898f70a8ec9a0508b2cfdfca03d3f97f77543e7415df9cc4a3a5c9644779a
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1630222527.cc93b136ebf5.1086.82
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd8419d69e4cf0449ff7741cccb09535ccbd4b812bb52ba093ac0da70808e3b3
|
| 3 |
+
size 8462
|
model-bin/finetune/base/log/events.out.tfevents.1630222956.cc93b136ebf5.1086.84
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cee186373891285cfab5d086521c277aa2cbbcfefb8312fbe0dd58b25b85374c
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1630223389.cc93b136ebf5.1086.86
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70b22297e197af0d1ead17a12eef94f896c86b9acfac9287d475155a3d4c4b46
|
| 3 |
+
size 8622
|