"auto-commit"
Browse files- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/rng_state.pth +2 -2
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/trainer_state.json +798 -3
- model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/training_args.bin +0 -0
- model-bin/finetune/base/log/1629998008.864934/events.out.tfevents.1629998008.8e89bd551565.924.261 +3 -0
- model-bin/finetune/base/log/1629998442.9438877/events.out.tfevents.1629998442.8e89bd551565.924.263 +3 -0
- model-bin/finetune/base/log/1629998872.4937503/events.out.tfevents.1629998872.8e89bd551565.924.265 +3 -0
- model-bin/finetune/base/log/1629999311.0902207/events.out.tfevents.1629999311.8e89bd551565.924.267 +3 -0
- model-bin/finetune/base/log/1629999744.3670874/events.out.tfevents.1629999744.8e89bd551565.924.269 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629998008.8e89bd551565.924.260 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629998442.8e89bd551565.924.262 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629998871.8e89bd551565.924.264 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629999311.8e89bd551565.924.266 +3 -0
- model-bin/finetune/base/log/events.out.tfevents.1629999744.8e89bd551565.924.268 +3 -0
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c6220178bfd9a2ecbe7067fc948b5353d428482f8f26accac8861c3e9800d32
|
| 3 |
size 722165393
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f9bebc6e429a7e01197322630d7b9a4d5cb32f48f0e08d480a96711d75f11a0
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed7c458ba4e8c089255b463a55fa305033c08d9a27f7f2dc8398c9cf6b013d7f
|
| 3 |
+
size 14503
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4d8b3e274ed10dcad9214825576b64bd9dd37a33a407546a15c5b9c3fe4a52f
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fa8d2961bde5cd0d43fc1e4bbb9455526c28df5ede72a8e3484d6b03a4beaf1
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.1743826049391605,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-101551",
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -232368,11 +232368,806 @@
|
|
| 232368 |
"eval_steps_per_second": 0.68,
|
| 232369 |
"eval_wer": 0.18273625239569513,
|
| 232370 |
"step": 104539
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232371 |
}
|
| 232372 |
],
|
| 232373 |
"max_steps": 620000,
|
| 232374 |
"num_train_epochs": 5000,
|
| 232375 |
-
"total_flos": 2.
|
| 232376 |
"trial_name": null,
|
| 232377 |
"trial_params": null
|
| 232378 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.1743826049391605,
|
| 3 |
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-101551",
|
| 4 |
+
"epoch": 847.995983935743,
|
| 5 |
+
"global_step": 105160,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 232368 |
"eval_steps_per_second": 0.68,
|
| 232369 |
"eval_wer": 0.18273625239569513,
|
| 232370 |
"step": 104539
|
| 232371 |
+
},
|
| 232372 |
+
{
|
| 232373 |
+
"epoch": 843.01,
|
| 232374 |
+
"learning_rate": 8.34136217948718e-06,
|
| 232375 |
+
"loss": 0.4295,
|
| 232376 |
+
"step": 104540
|
| 232377 |
+
},
|
| 232378 |
+
{
|
| 232379 |
+
"epoch": 843.05,
|
| 232380 |
+
"learning_rate": 8.341282051282053e-06,
|
| 232381 |
+
"loss": 0.2951,
|
| 232382 |
+
"step": 104545
|
| 232383 |
+
},
|
| 232384 |
+
{
|
| 232385 |
+
"epoch": 843.09,
|
| 232386 |
+
"learning_rate": 8.341201923076924e-06,
|
| 232387 |
+
"loss": 0.284,
|
| 232388 |
+
"step": 104550
|
| 232389 |
+
},
|
| 232390 |
+
{
|
| 232391 |
+
"epoch": 843.13,
|
| 232392 |
+
"learning_rate": 8.341121794871796e-06,
|
| 232393 |
+
"loss": 0.3668,
|
| 232394 |
+
"step": 104555
|
| 232395 |
+
},
|
| 232396 |
+
{
|
| 232397 |
+
"epoch": 843.17,
|
| 232398 |
+
"learning_rate": 8.341041666666667e-06,
|
| 232399 |
+
"loss": 0.5648,
|
| 232400 |
+
"step": 104560
|
| 232401 |
+
},
|
| 232402 |
+
{
|
| 232403 |
+
"epoch": 843.21,
|
| 232404 |
+
"learning_rate": 8.34096153846154e-06,
|
| 232405 |
+
"loss": 1.1357,
|
| 232406 |
+
"step": 104565
|
| 232407 |
+
},
|
| 232408 |
+
{
|
| 232409 |
+
"epoch": 843.25,
|
| 232410 |
+
"learning_rate": 8.340881410256411e-06,
|
| 232411 |
+
"loss": 0.263,
|
| 232412 |
+
"step": 104570
|
| 232413 |
+
},
|
| 232414 |
+
{
|
| 232415 |
+
"epoch": 843.29,
|
| 232416 |
+
"learning_rate": 8.340801282051283e-06,
|
| 232417 |
+
"loss": 0.2763,
|
| 232418 |
+
"step": 104575
|
| 232419 |
+
},
|
| 232420 |
+
{
|
| 232421 |
+
"epoch": 843.33,
|
| 232422 |
+
"learning_rate": 8.340721153846154e-06,
|
| 232423 |
+
"loss": 0.3174,
|
| 232424 |
+
"step": 104580
|
| 232425 |
+
},
|
| 232426 |
+
{
|
| 232427 |
+
"epoch": 843.37,
|
| 232428 |
+
"learning_rate": 8.340641025641027e-06,
|
| 232429 |
+
"loss": 0.5898,
|
| 232430 |
+
"step": 104585
|
| 232431 |
+
},
|
| 232432 |
+
{
|
| 232433 |
+
"epoch": 843.41,
|
| 232434 |
+
"learning_rate": 8.340560897435897e-06,
|
| 232435 |
+
"loss": 1.1578,
|
| 232436 |
+
"step": 104590
|
| 232437 |
+
},
|
| 232438 |
+
{
|
| 232439 |
+
"epoch": 843.45,
|
| 232440 |
+
"learning_rate": 8.34048076923077e-06,
|
| 232441 |
+
"loss": 0.311,
|
| 232442 |
+
"step": 104595
|
| 232443 |
+
},
|
| 232444 |
+
{
|
| 232445 |
+
"epoch": 843.49,
|
| 232446 |
+
"learning_rate": 8.340400641025643e-06,
|
| 232447 |
+
"loss": 0.2912,
|
| 232448 |
+
"step": 104600
|
| 232449 |
+
},
|
| 232450 |
+
{
|
| 232451 |
+
"epoch": 843.53,
|
| 232452 |
+
"learning_rate": 8.340320512820513e-06,
|
| 232453 |
+
"loss": 0.3265,
|
| 232454 |
+
"step": 104605
|
| 232455 |
+
},
|
| 232456 |
+
{
|
| 232457 |
+
"epoch": 843.57,
|
| 232458 |
+
"learning_rate": 8.340240384615386e-06,
|
| 232459 |
+
"loss": 0.4652,
|
| 232460 |
+
"step": 104610
|
| 232461 |
+
},
|
| 232462 |
+
{
|
| 232463 |
+
"epoch": 843.61,
|
| 232464 |
+
"learning_rate": 8.340160256410257e-06,
|
| 232465 |
+
"loss": 1.1352,
|
| 232466 |
+
"step": 104615
|
| 232467 |
+
},
|
| 232468 |
+
{
|
| 232469 |
+
"epoch": 843.65,
|
| 232470 |
+
"learning_rate": 8.340080128205128e-06,
|
| 232471 |
+
"loss": 0.2664,
|
| 232472 |
+
"step": 104620
|
| 232473 |
+
},
|
| 232474 |
+
{
|
| 232475 |
+
"epoch": 843.69,
|
| 232476 |
+
"learning_rate": 8.34e-06,
|
| 232477 |
+
"loss": 0.2771,
|
| 232478 |
+
"step": 104625
|
| 232479 |
+
},
|
| 232480 |
+
{
|
| 232481 |
+
"epoch": 843.73,
|
| 232482 |
+
"learning_rate": 8.339919871794873e-06,
|
| 232483 |
+
"loss": 0.3284,
|
| 232484 |
+
"step": 104630
|
| 232485 |
+
},
|
| 232486 |
+
{
|
| 232487 |
+
"epoch": 843.77,
|
| 232488 |
+
"learning_rate": 8.339839743589744e-06,
|
| 232489 |
+
"loss": 0.5917,
|
| 232490 |
+
"step": 104635
|
| 232491 |
+
},
|
| 232492 |
+
{
|
| 232493 |
+
"epoch": 843.81,
|
| 232494 |
+
"learning_rate": 8.339759615384616e-06,
|
| 232495 |
+
"loss": 1.0977,
|
| 232496 |
+
"step": 104640
|
| 232497 |
+
},
|
| 232498 |
+
{
|
| 232499 |
+
"epoch": 843.85,
|
| 232500 |
+
"learning_rate": 8.339679487179489e-06,
|
| 232501 |
+
"loss": 0.3699,
|
| 232502 |
+
"step": 104645
|
| 232503 |
+
},
|
| 232504 |
+
{
|
| 232505 |
+
"epoch": 843.89,
|
| 232506 |
+
"learning_rate": 8.33959935897436e-06,
|
| 232507 |
+
"loss": 0.3303,
|
| 232508 |
+
"step": 104650
|
| 232509 |
+
},
|
| 232510 |
+
{
|
| 232511 |
+
"epoch": 843.93,
|
| 232512 |
+
"learning_rate": 8.339519230769231e-06,
|
| 232513 |
+
"loss": 0.3195,
|
| 232514 |
+
"step": 104655
|
| 232515 |
+
},
|
| 232516 |
+
{
|
| 232517 |
+
"epoch": 843.97,
|
| 232518 |
+
"learning_rate": 8.339439102564103e-06,
|
| 232519 |
+
"loss": 0.6459,
|
| 232520 |
+
"step": 104660
|
| 232521 |
+
},
|
| 232522 |
+
{
|
| 232523 |
+
"epoch": 844.0,
|
| 232524 |
+
"eval_loss": 0.41473865509033203,
|
| 232525 |
+
"eval_runtime": 40.4557,
|
| 232526 |
+
"eval_samples_per_second": 20.714,
|
| 232527 |
+
"eval_steps_per_second": 0.667,
|
| 232528 |
+
"eval_wer": 0.18636494667051023,
|
| 232529 |
+
"step": 104663
|
| 232530 |
+
},
|
| 232531 |
+
{
|
| 232532 |
+
"epoch": 844.02,
|
| 232533 |
+
"learning_rate": 8.339358974358976e-06,
|
| 232534 |
+
"loss": 0.3795,
|
| 232535 |
+
"step": 104665
|
| 232536 |
+
},
|
| 232537 |
+
{
|
| 232538 |
+
"epoch": 844.06,
|
| 232539 |
+
"learning_rate": 8.339278846153847e-06,
|
| 232540 |
+
"loss": 0.2505,
|
| 232541 |
+
"step": 104670
|
| 232542 |
+
},
|
| 232543 |
+
{
|
| 232544 |
+
"epoch": 844.1,
|
| 232545 |
+
"learning_rate": 8.339198717948718e-06,
|
| 232546 |
+
"loss": 0.2766,
|
| 232547 |
+
"step": 104675
|
| 232548 |
+
},
|
| 232549 |
+
{
|
| 232550 |
+
"epoch": 844.14,
|
| 232551 |
+
"learning_rate": 8.33911858974359e-06,
|
| 232552 |
+
"loss": 0.2945,
|
| 232553 |
+
"step": 104680
|
| 232554 |
+
},
|
| 232555 |
+
{
|
| 232556 |
+
"epoch": 844.18,
|
| 232557 |
+
"learning_rate": 8.339038461538463e-06,
|
| 232558 |
+
"loss": 0.5911,
|
| 232559 |
+
"step": 104685
|
| 232560 |
+
},
|
| 232561 |
+
{
|
| 232562 |
+
"epoch": 844.22,
|
| 232563 |
+
"learning_rate": 8.338958333333334e-06,
|
| 232564 |
+
"loss": 1.1743,
|
| 232565 |
+
"step": 104690
|
| 232566 |
+
},
|
| 232567 |
+
{
|
| 232568 |
+
"epoch": 844.26,
|
| 232569 |
+
"learning_rate": 8.338878205128206e-06,
|
| 232570 |
+
"loss": 0.289,
|
| 232571 |
+
"step": 104695
|
| 232572 |
+
},
|
| 232573 |
+
{
|
| 232574 |
+
"epoch": 844.3,
|
| 232575 |
+
"learning_rate": 8.338798076923079e-06,
|
| 232576 |
+
"loss": 0.2816,
|
| 232577 |
+
"step": 104700
|
| 232578 |
+
},
|
| 232579 |
+
{
|
| 232580 |
+
"epoch": 844.34,
|
| 232581 |
+
"learning_rate": 8.33871794871795e-06,
|
| 232582 |
+
"loss": 0.3096,
|
| 232583 |
+
"step": 104705
|
| 232584 |
+
},
|
| 232585 |
+
{
|
| 232586 |
+
"epoch": 844.38,
|
| 232587 |
+
"learning_rate": 8.338637820512821e-06,
|
| 232588 |
+
"loss": 0.6509,
|
| 232589 |
+
"step": 104710
|
| 232590 |
+
},
|
| 232591 |
+
{
|
| 232592 |
+
"epoch": 844.42,
|
| 232593 |
+
"learning_rate": 8.338557692307693e-06,
|
| 232594 |
+
"loss": 1.0765,
|
| 232595 |
+
"step": 104715
|
| 232596 |
+
},
|
| 232597 |
+
{
|
| 232598 |
+
"epoch": 844.46,
|
| 232599 |
+
"learning_rate": 8.338477564102566e-06,
|
| 232600 |
+
"loss": 0.3154,
|
| 232601 |
+
"step": 104720
|
| 232602 |
+
},
|
| 232603 |
+
{
|
| 232604 |
+
"epoch": 844.5,
|
| 232605 |
+
"learning_rate": 8.338397435897435e-06,
|
| 232606 |
+
"loss": 0.3312,
|
| 232607 |
+
"step": 104725
|
| 232608 |
+
},
|
| 232609 |
+
{
|
| 232610 |
+
"epoch": 844.54,
|
| 232611 |
+
"learning_rate": 8.338317307692308e-06,
|
| 232612 |
+
"loss": 0.3656,
|
| 232613 |
+
"step": 104730
|
| 232614 |
+
},
|
| 232615 |
+
{
|
| 232616 |
+
"epoch": 844.58,
|
| 232617 |
+
"learning_rate": 8.33823717948718e-06,
|
| 232618 |
+
"loss": 0.5444,
|
| 232619 |
+
"step": 104735
|
| 232620 |
+
},
|
| 232621 |
+
{
|
| 232622 |
+
"epoch": 844.62,
|
| 232623 |
+
"learning_rate": 8.338157051282051e-06,
|
| 232624 |
+
"loss": 1.1109,
|
| 232625 |
+
"step": 104740
|
| 232626 |
+
},
|
| 232627 |
+
{
|
| 232628 |
+
"epoch": 844.66,
|
| 232629 |
+
"learning_rate": 8.338076923076924e-06,
|
| 232630 |
+
"loss": 0.3036,
|
| 232631 |
+
"step": 104745
|
| 232632 |
+
},
|
| 232633 |
+
{
|
| 232634 |
+
"epoch": 844.7,
|
| 232635 |
+
"learning_rate": 8.337996794871796e-06,
|
| 232636 |
+
"loss": 0.2363,
|
| 232637 |
+
"step": 104750
|
| 232638 |
+
},
|
| 232639 |
+
{
|
| 232640 |
+
"epoch": 844.74,
|
| 232641 |
+
"learning_rate": 8.337916666666667e-06,
|
| 232642 |
+
"loss": 0.3796,
|
| 232643 |
+
"step": 104755
|
| 232644 |
+
},
|
| 232645 |
+
{
|
| 232646 |
+
"epoch": 844.78,
|
| 232647 |
+
"learning_rate": 8.337836538461538e-06,
|
| 232648 |
+
"loss": 0.6762,
|
| 232649 |
+
"step": 104760
|
| 232650 |
+
},
|
| 232651 |
+
{
|
| 232652 |
+
"epoch": 844.82,
|
| 232653 |
+
"learning_rate": 8.337756410256411e-06,
|
| 232654 |
+
"loss": 0.9999,
|
| 232655 |
+
"step": 104765
|
| 232656 |
+
},
|
| 232657 |
+
{
|
| 232658 |
+
"epoch": 844.86,
|
| 232659 |
+
"learning_rate": 8.337676282051283e-06,
|
| 232660 |
+
"loss": 0.3234,
|
| 232661 |
+
"step": 104770
|
| 232662 |
+
},
|
| 232663 |
+
{
|
| 232664 |
+
"epoch": 844.9,
|
| 232665 |
+
"learning_rate": 8.337596153846154e-06,
|
| 232666 |
+
"loss": 0.2904,
|
| 232667 |
+
"step": 104775
|
| 232668 |
+
},
|
| 232669 |
+
{
|
| 232670 |
+
"epoch": 844.94,
|
| 232671 |
+
"learning_rate": 8.337516025641025e-06,
|
| 232672 |
+
"loss": 0.3475,
|
| 232673 |
+
"step": 104780
|
| 232674 |
+
},
|
| 232675 |
+
{
|
| 232676 |
+
"epoch": 844.98,
|
| 232677 |
+
"learning_rate": 8.337435897435898e-06,
|
| 232678 |
+
"loss": 0.7839,
|
| 232679 |
+
"step": 104785
|
| 232680 |
+
},
|
| 232681 |
+
{
|
| 232682 |
+
"epoch": 845.0,
|
| 232683 |
+
"eval_loss": 0.373065710067749,
|
| 232684 |
+
"eval_runtime": 38.6362,
|
| 232685 |
+
"eval_samples_per_second": 21.689,
|
| 232686 |
+
"eval_steps_per_second": 0.699,
|
| 232687 |
+
"eval_wer": 0.18153026704095587,
|
| 232688 |
+
"step": 104787
|
| 232689 |
+
},
|
| 232690 |
+
{
|
| 232691 |
+
"epoch": 838.02,
|
| 232692 |
+
"learning_rate": 8.33735576923077e-06,
|
| 232693 |
+
"loss": 0.3005,
|
| 232694 |
+
"step": 104790
|
| 232695 |
+
},
|
| 232696 |
+
{
|
| 232697 |
+
"epoch": 838.06,
|
| 232698 |
+
"learning_rate": 8.337275641025641e-06,
|
| 232699 |
+
"loss": 0.2741,
|
| 232700 |
+
"step": 104795
|
| 232701 |
+
},
|
| 232702 |
+
{
|
| 232703 |
+
"epoch": 838.1,
|
| 232704 |
+
"learning_rate": 8.337195512820514e-06,
|
| 232705 |
+
"loss": 0.3582,
|
| 232706 |
+
"step": 104800
|
| 232707 |
+
},
|
| 232708 |
+
{
|
| 232709 |
+
"epoch": 838.14,
|
| 232710 |
+
"learning_rate": 8.337115384615386e-06,
|
| 232711 |
+
"loss": 0.3578,
|
| 232712 |
+
"step": 104805
|
| 232713 |
+
},
|
| 232714 |
+
{
|
| 232715 |
+
"epoch": 838.18,
|
| 232716 |
+
"learning_rate": 8.337035256410257e-06,
|
| 232717 |
+
"loss": 0.6388,
|
| 232718 |
+
"step": 104810
|
| 232719 |
+
},
|
| 232720 |
+
{
|
| 232721 |
+
"epoch": 838.22,
|
| 232722 |
+
"learning_rate": 8.336955128205128e-06,
|
| 232723 |
+
"loss": 0.8697,
|
| 232724 |
+
"step": 104815
|
| 232725 |
+
},
|
| 232726 |
+
{
|
| 232727 |
+
"epoch": 838.26,
|
| 232728 |
+
"learning_rate": 8.336875000000001e-06,
|
| 232729 |
+
"loss": 0.3219,
|
| 232730 |
+
"step": 104820
|
| 232731 |
+
},
|
| 232732 |
+
{
|
| 232733 |
+
"epoch": 838.3,
|
| 232734 |
+
"learning_rate": 8.336794871794873e-06,
|
| 232735 |
+
"loss": 0.3158,
|
| 232736 |
+
"step": 104825
|
| 232737 |
+
},
|
| 232738 |
+
{
|
| 232739 |
+
"epoch": 838.34,
|
| 232740 |
+
"learning_rate": 8.336714743589744e-06,
|
| 232741 |
+
"loss": 0.3448,
|
| 232742 |
+
"step": 104830
|
| 232743 |
+
},
|
| 232744 |
+
{
|
| 232745 |
+
"epoch": 838.38,
|
| 232746 |
+
"learning_rate": 8.336634615384615e-06,
|
| 232747 |
+
"loss": 0.7639,
|
| 232748 |
+
"step": 104835
|
| 232749 |
+
},
|
| 232750 |
+
{
|
| 232751 |
+
"epoch": 838.42,
|
| 232752 |
+
"learning_rate": 8.336554487179489e-06,
|
| 232753 |
+
"loss": 0.9047,
|
| 232754 |
+
"step": 104840
|
| 232755 |
+
},
|
| 232756 |
+
{
|
| 232757 |
+
"epoch": 838.46,
|
| 232758 |
+
"learning_rate": 8.33647435897436e-06,
|
| 232759 |
+
"loss": 0.3554,
|
| 232760 |
+
"step": 104845
|
| 232761 |
+
},
|
| 232762 |
+
{
|
| 232763 |
+
"epoch": 838.5,
|
| 232764 |
+
"learning_rate": 8.336394230769231e-06,
|
| 232765 |
+
"loss": 0.3034,
|
| 232766 |
+
"step": 104850
|
| 232767 |
+
},
|
| 232768 |
+
{
|
| 232769 |
+
"epoch": 838.54,
|
| 232770 |
+
"learning_rate": 8.336314102564104e-06,
|
| 232771 |
+
"loss": 0.42,
|
| 232772 |
+
"step": 104855
|
| 232773 |
+
},
|
| 232774 |
+
{
|
| 232775 |
+
"epoch": 838.58,
|
| 232776 |
+
"learning_rate": 8.336233974358976e-06,
|
| 232777 |
+
"loss": 0.7837,
|
| 232778 |
+
"step": 104860
|
| 232779 |
+
},
|
| 232780 |
+
{
|
| 232781 |
+
"epoch": 838.62,
|
| 232782 |
+
"learning_rate": 8.336153846153847e-06,
|
| 232783 |
+
"loss": 0.8555,
|
| 232784 |
+
"step": 104865
|
| 232785 |
+
},
|
| 232786 |
+
{
|
| 232787 |
+
"epoch": 838.66,
|
| 232788 |
+
"learning_rate": 8.336073717948718e-06,
|
| 232789 |
+
"loss": 0.2786,
|
| 232790 |
+
"step": 104870
|
| 232791 |
+
},
|
| 232792 |
+
{
|
| 232793 |
+
"epoch": 838.7,
|
| 232794 |
+
"learning_rate": 8.335993589743591e-06,
|
| 232795 |
+
"loss": 0.3009,
|
| 232796 |
+
"step": 104875
|
| 232797 |
+
},
|
| 232798 |
+
{
|
| 232799 |
+
"epoch": 838.74,
|
| 232800 |
+
"learning_rate": 8.335913461538461e-06,
|
| 232801 |
+
"loss": 0.4297,
|
| 232802 |
+
"step": 104880
|
| 232803 |
+
},
|
| 232804 |
+
{
|
| 232805 |
+
"epoch": 838.78,
|
| 232806 |
+
"learning_rate": 8.335833333333334e-06,
|
| 232807 |
+
"loss": 0.8525,
|
| 232808 |
+
"step": 104885
|
| 232809 |
+
},
|
| 232810 |
+
{
|
| 232811 |
+
"epoch": 838.82,
|
| 232812 |
+
"learning_rate": 8.335753205128205e-06,
|
| 232813 |
+
"loss": 0.7846,
|
| 232814 |
+
"step": 104890
|
| 232815 |
+
},
|
| 232816 |
+
{
|
| 232817 |
+
"epoch": 838.86,
|
| 232818 |
+
"learning_rate": 8.335673076923077e-06,
|
| 232819 |
+
"loss": 0.2307,
|
| 232820 |
+
"step": 104895
|
| 232821 |
+
},
|
| 232822 |
+
{
|
| 232823 |
+
"epoch": 838.9,
|
| 232824 |
+
"learning_rate": 8.33559294871795e-06,
|
| 232825 |
+
"loss": 0.2949,
|
| 232826 |
+
"step": 104900
|
| 232827 |
+
},
|
| 232828 |
+
{
|
| 232829 |
+
"epoch": 838.94,
|
| 232830 |
+
"learning_rate": 8.335512820512821e-06,
|
| 232831 |
+
"loss": 0.3757,
|
| 232832 |
+
"step": 104905
|
| 232833 |
+
},
|
| 232834 |
+
{
|
| 232835 |
+
"epoch": 838.98,
|
| 232836 |
+
"learning_rate": 8.335432692307693e-06,
|
| 232837 |
+
"loss": 0.794,
|
| 232838 |
+
"step": 104910
|
| 232839 |
+
},
|
| 232840 |
+
{
|
| 232841 |
+
"epoch": 839.0,
|
| 232842 |
+
"eval_loss": 0.39527204632759094,
|
| 232843 |
+
"eval_runtime": 38.317,
|
| 232844 |
+
"eval_samples_per_second": 21.87,
|
| 232845 |
+
"eval_steps_per_second": 0.705,
|
| 232846 |
+
"eval_wer": 0.19303890026323486,
|
| 232847 |
+
"step": 104912
|
| 232848 |
+
},
|
| 232849 |
+
{
|
| 232850 |
+
"epoch": 846.02,
|
| 232851 |
+
"learning_rate": 8.335352564102564e-06,
|
| 232852 |
+
"loss": 0.335,
|
| 232853 |
+
"step": 104915
|
| 232854 |
+
},
|
| 232855 |
+
{
|
| 232856 |
+
"epoch": 846.06,
|
| 232857 |
+
"learning_rate": 8.335272435897437e-06,
|
| 232858 |
+
"loss": 0.2563,
|
| 232859 |
+
"step": 104920
|
| 232860 |
+
},
|
| 232861 |
+
{
|
| 232862 |
+
"epoch": 846.1,
|
| 232863 |
+
"learning_rate": 8.335192307692308e-06,
|
| 232864 |
+
"loss": 0.2915,
|
| 232865 |
+
"step": 104925
|
| 232866 |
+
},
|
| 232867 |
+
{
|
| 232868 |
+
"epoch": 846.15,
|
| 232869 |
+
"learning_rate": 8.33511217948718e-06,
|
| 232870 |
+
"loss": 0.4043,
|
| 232871 |
+
"step": 104930
|
| 232872 |
+
},
|
| 232873 |
+
{
|
| 232874 |
+
"epoch": 846.19,
|
| 232875 |
+
"learning_rate": 8.335032051282051e-06,
|
| 232876 |
+
"loss": 0.7446,
|
| 232877 |
+
"step": 104935
|
| 232878 |
+
},
|
| 232879 |
+
{
|
| 232880 |
+
"epoch": 846.23,
|
| 232881 |
+
"learning_rate": 8.334951923076924e-06,
|
| 232882 |
+
"loss": 0.8774,
|
| 232883 |
+
"step": 104940
|
| 232884 |
+
},
|
| 232885 |
+
{
|
| 232886 |
+
"epoch": 846.27,
|
| 232887 |
+
"learning_rate": 8.334871794871796e-06,
|
| 232888 |
+
"loss": 0.2755,
|
| 232889 |
+
"step": 104945
|
| 232890 |
+
},
|
| 232891 |
+
{
|
| 232892 |
+
"epoch": 846.31,
|
| 232893 |
+
"learning_rate": 8.334791666666667e-06,
|
| 232894 |
+
"loss": 0.3497,
|
| 232895 |
+
"step": 104950
|
| 232896 |
+
},
|
| 232897 |
+
{
|
| 232898 |
+
"epoch": 846.35,
|
| 232899 |
+
"learning_rate": 8.33471153846154e-06,
|
| 232900 |
+
"loss": 0.44,
|
| 232901 |
+
"step": 104955
|
| 232902 |
+
},
|
| 232903 |
+
{
|
| 232904 |
+
"epoch": 846.39,
|
| 232905 |
+
"learning_rate": 8.334631410256411e-06,
|
| 232906 |
+
"loss": 0.9169,
|
| 232907 |
+
"step": 104960
|
| 232908 |
+
},
|
| 232909 |
+
{
|
| 232910 |
+
"epoch": 846.43,
|
| 232911 |
+
"learning_rate": 8.334551282051283e-06,
|
| 232912 |
+
"loss": 0.9068,
|
| 232913 |
+
"step": 104965
|
| 232914 |
+
},
|
| 232915 |
+
{
|
| 232916 |
+
"epoch": 846.47,
|
| 232917 |
+
"learning_rate": 8.334471153846154e-06,
|
| 232918 |
+
"loss": 0.3642,
|
| 232919 |
+
"step": 104970
|
| 232920 |
+
},
|
| 232921 |
+
{
|
| 232922 |
+
"epoch": 846.51,
|
| 232923 |
+
"learning_rate": 8.334391025641027e-06,
|
| 232924 |
+
"loss": 0.2927,
|
| 232925 |
+
"step": 104975
|
| 232926 |
+
},
|
| 232927 |
+
{
|
| 232928 |
+
"epoch": 846.55,
|
| 232929 |
+
"learning_rate": 8.334310897435898e-06,
|
| 232930 |
+
"loss": 0.3517,
|
| 232931 |
+
"step": 104980
|
| 232932 |
+
},
|
| 232933 |
+
{
|
| 232934 |
+
"epoch": 846.59,
|
| 232935 |
+
"learning_rate": 8.33423076923077e-06,
|
| 232936 |
+
"loss": 0.8367,
|
| 232937 |
+
"step": 104985
|
| 232938 |
+
},
|
| 232939 |
+
{
|
| 232940 |
+
"epoch": 846.63,
|
| 232941 |
+
"learning_rate": 8.334150641025641e-06,
|
| 232942 |
+
"loss": 0.7659,
|
| 232943 |
+
"step": 104990
|
| 232944 |
+
},
|
| 232945 |
+
{
|
| 232946 |
+
"epoch": 846.67,
|
| 232947 |
+
"learning_rate": 8.334070512820514e-06,
|
| 232948 |
+
"loss": 0.3322,
|
| 232949 |
+
"step": 104995
|
| 232950 |
+
},
|
| 232951 |
+
{
|
| 232952 |
+
"epoch": 846.71,
|
| 232953 |
+
"learning_rate": 8.333990384615386e-06,
|
| 232954 |
+
"loss": 0.3179,
|
| 232955 |
+
"step": 105000
|
| 232956 |
+
},
|
| 232957 |
+
{
|
| 232958 |
+
"epoch": 846.75,
|
| 232959 |
+
"learning_rate": 8.333910256410257e-06,
|
| 232960 |
+
"loss": 0.3345,
|
| 232961 |
+
"step": 105005
|
| 232962 |
+
},
|
| 232963 |
+
{
|
| 232964 |
+
"epoch": 846.79,
|
| 232965 |
+
"learning_rate": 8.33383012820513e-06,
|
| 232966 |
+
"loss": 0.7515,
|
| 232967 |
+
"step": 105010
|
| 232968 |
+
},
|
| 232969 |
+
{
|
| 232970 |
+
"epoch": 846.83,
|
| 232971 |
+
"learning_rate": 8.33375e-06,
|
| 232972 |
+
"loss": 0.9483,
|
| 232973 |
+
"step": 105015
|
| 232974 |
+
},
|
| 232975 |
+
{
|
| 232976 |
+
"epoch": 846.87,
|
| 232977 |
+
"learning_rate": 8.333669871794873e-06,
|
| 232978 |
+
"loss": 0.3109,
|
| 232979 |
+
"step": 105020
|
| 232980 |
+
},
|
| 232981 |
+
{
|
| 232982 |
+
"epoch": 846.91,
|
| 232983 |
+
"learning_rate": 8.333589743589744e-06,
|
| 232984 |
+
"loss": 0.2624,
|
| 232985 |
+
"step": 105025
|
| 232986 |
+
},
|
| 232987 |
+
{
|
| 232988 |
+
"epoch": 846.95,
|
| 232989 |
+
"learning_rate": 8.333509615384615e-06,
|
| 232990 |
+
"loss": 0.421,
|
| 232991 |
+
"step": 105030
|
| 232992 |
+
},
|
| 232993 |
+
{
|
| 232994 |
+
"epoch": 846.99,
|
| 232995 |
+
"learning_rate": 8.333429487179487e-06,
|
| 232996 |
+
"loss": 0.8294,
|
| 232997 |
+
"step": 105035
|
| 232998 |
+
},
|
| 232999 |
+
{
|
| 233000 |
+
"epoch": 847.0,
|
| 233001 |
+
"eval_loss": 0.3643065392971039,
|
| 233002 |
+
"eval_runtime": 40.2346,
|
| 233003 |
+
"eval_samples_per_second": 20.828,
|
| 233004 |
+
"eval_steps_per_second": 0.671,
|
| 233005 |
+
"eval_wer": 0.18286835222319092,
|
| 233006 |
+
"step": 105036
|
| 233007 |
+
},
|
| 233008 |
+
{
|
| 233009 |
+
"epoch": 847.03,
|
| 233010 |
+
"learning_rate": 8.33334935897436e-06,
|
| 233011 |
+
"loss": 0.2923,
|
| 233012 |
+
"step": 105040
|
| 233013 |
+
},
|
| 233014 |
+
{
|
| 233015 |
+
"epoch": 847.07,
|
| 233016 |
+
"learning_rate": 8.333269230769231e-06,
|
| 233017 |
+
"loss": 0.2918,
|
| 233018 |
+
"step": 105045
|
| 233019 |
+
},
|
| 233020 |
+
{
|
| 233021 |
+
"epoch": 847.11,
|
| 233022 |
+
"learning_rate": 8.333189102564103e-06,
|
| 233023 |
+
"loss": 0.3143,
|
| 233024 |
+
"step": 105050
|
| 233025 |
+
},
|
| 233026 |
+
{
|
| 233027 |
+
"epoch": 847.15,
|
| 233028 |
+
"learning_rate": 8.333108974358976e-06,
|
| 233029 |
+
"loss": 0.4531,
|
| 233030 |
+
"step": 105055
|
| 233031 |
+
},
|
| 233032 |
+
{
|
| 233033 |
+
"epoch": 847.19,
|
| 233034 |
+
"learning_rate": 8.333028846153847e-06,
|
| 233035 |
+
"loss": 0.9003,
|
| 233036 |
+
"step": 105060
|
| 233037 |
+
},
|
| 233038 |
+
{
|
| 233039 |
+
"epoch": 847.23,
|
| 233040 |
+
"learning_rate": 8.332948717948718e-06,
|
| 233041 |
+
"loss": 0.714,
|
| 233042 |
+
"step": 105065
|
| 233043 |
+
},
|
| 233044 |
+
{
|
| 233045 |
+
"epoch": 847.27,
|
| 233046 |
+
"learning_rate": 8.33286858974359e-06,
|
| 233047 |
+
"loss": 0.3148,
|
| 233048 |
+
"step": 105070
|
| 233049 |
+
},
|
| 233050 |
+
{
|
| 233051 |
+
"epoch": 847.31,
|
| 233052 |
+
"learning_rate": 8.332788461538463e-06,
|
| 233053 |
+
"loss": 0.2865,
|
| 233054 |
+
"step": 105075
|
| 233055 |
+
},
|
| 233056 |
+
{
|
| 233057 |
+
"epoch": 847.35,
|
| 233058 |
+
"learning_rate": 8.332708333333334e-06,
|
| 233059 |
+
"loss": 0.3717,
|
| 233060 |
+
"step": 105080
|
| 233061 |
+
},
|
| 233062 |
+
{
|
| 233063 |
+
"epoch": 847.39,
|
| 233064 |
+
"learning_rate": 8.332628205128205e-06,
|
| 233065 |
+
"loss": 0.8555,
|
| 233066 |
+
"step": 105085
|
| 233067 |
+
},
|
| 233068 |
+
{
|
| 233069 |
+
"epoch": 847.43,
|
| 233070 |
+
"learning_rate": 8.332548076923077e-06,
|
| 233071 |
+
"loss": 0.566,
|
| 233072 |
+
"step": 105090
|
| 233073 |
+
},
|
| 233074 |
+
{
|
| 233075 |
+
"epoch": 847.47,
|
| 233076 |
+
"learning_rate": 8.33246794871795e-06,
|
| 233077 |
+
"loss": 0.3594,
|
| 233078 |
+
"step": 105095
|
| 233079 |
+
},
|
| 233080 |
+
{
|
| 233081 |
+
"epoch": 847.51,
|
| 233082 |
+
"learning_rate": 8.332387820512821e-06,
|
| 233083 |
+
"loss": 0.2978,
|
| 233084 |
+
"step": 105100
|
| 233085 |
+
},
|
| 233086 |
+
{
|
| 233087 |
+
"epoch": 847.55,
|
| 233088 |
+
"learning_rate": 8.332307692307693e-06,
|
| 233089 |
+
"loss": 0.4455,
|
| 233090 |
+
"step": 105105
|
| 233091 |
+
},
|
| 233092 |
+
{
|
| 233093 |
+
"epoch": 847.59,
|
| 233094 |
+
"learning_rate": 8.332227564102566e-06,
|
| 233095 |
+
"loss": 0.8444,
|
| 233096 |
+
"step": 105110
|
| 233097 |
+
},
|
| 233098 |
+
{
|
| 233099 |
+
"epoch": 847.63,
|
| 233100 |
+
"learning_rate": 8.332147435897437e-06,
|
| 233101 |
+
"loss": 0.8164,
|
| 233102 |
+
"step": 105115
|
| 233103 |
+
},
|
| 233104 |
+
{
|
| 233105 |
+
"epoch": 847.67,
|
| 233106 |
+
"learning_rate": 8.332067307692308e-06,
|
| 233107 |
+
"loss": 0.2587,
|
| 233108 |
+
"step": 105120
|
| 233109 |
+
},
|
| 233110 |
+
{
|
| 233111 |
+
"epoch": 847.71,
|
| 233112 |
+
"learning_rate": 8.33198717948718e-06,
|
| 233113 |
+
"loss": 0.4058,
|
| 233114 |
+
"step": 105125
|
| 233115 |
+
},
|
| 233116 |
+
{
|
| 233117 |
+
"epoch": 847.76,
|
| 233118 |
+
"learning_rate": 8.331907051282053e-06,
|
| 233119 |
+
"loss": 0.3621,
|
| 233120 |
+
"step": 105130
|
| 233121 |
+
},
|
| 233122 |
+
{
|
| 233123 |
+
"epoch": 847.8,
|
| 233124 |
+
"learning_rate": 8.331826923076924e-06,
|
| 233125 |
+
"loss": 0.8602,
|
| 233126 |
+
"step": 105135
|
| 233127 |
+
},
|
| 233128 |
+
{
|
| 233129 |
+
"epoch": 847.84,
|
| 233130 |
+
"learning_rate": 8.331746794871795e-06,
|
| 233131 |
+
"loss": 0.6104,
|
| 233132 |
+
"step": 105140
|
| 233133 |
+
},
|
| 233134 |
+
{
|
| 233135 |
+
"epoch": 847.88,
|
| 233136 |
+
"learning_rate": 8.331666666666668e-06,
|
| 233137 |
+
"loss": 0.3162,
|
| 233138 |
+
"step": 105145
|
| 233139 |
+
},
|
| 233140 |
+
{
|
| 233141 |
+
"epoch": 847.92,
|
| 233142 |
+
"learning_rate": 8.33158653846154e-06,
|
| 233143 |
+
"loss": 0.322,
|
| 233144 |
+
"step": 105150
|
| 233145 |
+
},
|
| 233146 |
+
{
|
| 233147 |
+
"epoch": 847.96,
|
| 233148 |
+
"learning_rate": 8.331506410256411e-06,
|
| 233149 |
+
"loss": 0.3725,
|
| 233150 |
+
"step": 105155
|
| 233151 |
+
},
|
| 233152 |
+
{
|
| 233153 |
+
"epoch": 848.0,
|
| 233154 |
+
"learning_rate": 8.331426282051283e-06,
|
| 233155 |
+
"loss": 1.2174,
|
| 233156 |
+
"step": 105160
|
| 233157 |
+
},
|
| 233158 |
+
{
|
| 233159 |
+
"epoch": 848.0,
|
| 233160 |
+
"eval_loss": 0.36234787106513977,
|
| 233161 |
+
"eval_runtime": 40.5867,
|
| 233162 |
+
"eval_samples_per_second": 20.647,
|
| 233163 |
+
"eval_steps_per_second": 0.665,
|
| 233164 |
+
"eval_wer": 0.18248653370214005,
|
| 233165 |
+
"step": 105160
|
| 233166 |
}
|
| 233167 |
],
|
| 233168 |
"max_steps": 620000,
|
| 233169 |
"num_train_epochs": 5000,
|
| 233170 |
+
"total_flos": 2.9594849852297996e+20,
|
| 233171 |
"trial_name": null,
|
| 233172 |
"trial_params": null
|
| 233173 |
}
|
model-bin/finetune/base/{checkpoint-104539 β checkpoint-105160}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/log/1629998008.864934/events.out.tfevents.1629998008.8e89bd551565.924.261
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6985ac6c523ebb361c201f0491f8b11aa8c9d72903006aaee9fd47477f5606d
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629998442.9438877/events.out.tfevents.1629998442.8e89bd551565.924.263
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5181ba135022c5ab634da700e0f1b262d9308785fe2e359733ba74a0bf35972f
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629998872.4937503/events.out.tfevents.1629998872.8e89bd551565.924.265
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06818313c2ea6c077cc639087526e7d6e544e4a71ccbe1fc421b855b2cbd6a2a
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629999311.0902207/events.out.tfevents.1629999311.8e89bd551565.924.267
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c2b42b4fceef552a2370566d308ed6f0a080a3da0311a6e5f76f35f730dcd29
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/1629999744.3670874/events.out.tfevents.1629999744.8e89bd551565.924.269
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9d83c10623b43b6ef98e4072255e54a7862729b0134d4fe8b4aec60189924f3
|
| 3 |
+
size 4194
|
model-bin/finetune/base/log/events.out.tfevents.1629998008.8e89bd551565.924.260
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b40b56ca2e9f454b2a121871b848ca19688682f50e87dded377c7ee9f6941de3
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629998442.8e89bd551565.924.262
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bf4d54257f184712fd7465ed1aabd91aff76f91693fed51ccd46910126f4161
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629998871.8e89bd551565.924.264
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e2ac36398fe7c1959fcb4aa61fd563d96938ac530459bb83b8f9941f145adc4
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629999311.8e89bd551565.924.266
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6a32dafdd64fbb356534d7cb19106651f4c27f6ea1f27f6dcfcb6c1a4f9e8ab
|
| 3 |
+
size 8622
|
model-bin/finetune/base/log/events.out.tfevents.1629999744.8e89bd551565.924.268
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1575e77688a45ed03d647a849aa97948e839b215a166541f4478d53cacc3833e
|
| 3 |
+
size 8622
|