auto commit
Browse files- main.py +1 -1
- model-bin/finetune/base/checkpoint-8212/pytorch_model.bin +0 -3
- model-bin/finetune/base/checkpoint-8212/rng_state.pth +0 -3
- model-bin/finetune/base/checkpoint-8212/scaler.pt +0 -3
- model-bin/finetune/base/checkpoint-8212/trainer_state.json +0 -0
- model-bin/finetune/base/checkpoint-8336/config.json +0 -78
- model-bin/finetune/base/checkpoint-8336/optimizer.pt +0 -3
- model-bin/finetune/base/checkpoint-8336/preprocessor_config.json +0 -9
- model-bin/finetune/base/checkpoint-8336/scheduler.pt +0 -3
- model-bin/finetune/base/checkpoint-8336/trainer_state.json +0 -0
- model-bin/finetune/base/checkpoint-8336/training_args.bin +0 -3
- model-bin/finetune/base/checkpoint-8709/config.json +0 -78
- model-bin/finetune/base/checkpoint-8709/optimizer.pt +0 -3
- model-bin/finetune/base/checkpoint-8709/preprocessor_config.json +0 -9
- model-bin/finetune/base/checkpoint-8709/pytorch_model.bin +0 -3
- model-bin/finetune/base/checkpoint-8709/scheduler.pt +0 -3
- model-bin/finetune/base/checkpoint-8709/training_args.bin +0 -3
- model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/config.json +0 -0
- model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/optimizer.pt +1 -1
- model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/preprocessor_config.json +0 -0
- model-bin/finetune/base/{checkpoint-8336 β checkpoint-8958}/pytorch_model.bin +1 -1
- model-bin/finetune/base/{checkpoint-8709 β checkpoint-8958}/rng_state.pth +2 -2
- model-bin/finetune/base/{checkpoint-8709 β checkpoint-8958}/scaler.pt +1 -1
- model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/scheduler.pt +1 -1
- model-bin/finetune/base/{checkpoint-8709 β checkpoint-8958}/trainer_state.json +1517 -5
- model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/training_args.bin +0 -0
- model-bin/finetune/base/{checkpoint-8336/scaler.pt β log/1629481571.7415848/events.out.tfevents.1629481571.2977154bd390.32087.9} +2 -2
- model-bin/finetune/base/{checkpoint-8336/rng_state.pth β log/events.out.tfevents.1629481571.2977154bd390.32087.8} +2 -2
main.py
CHANGED
|
@@ -89,7 +89,7 @@ def load_prepared_dataset(path, processor, cache_file_filter_name, cache_file_ma
|
|
| 89 |
def commit_checkpoint():
|
| 90 |
submit_commands = [
|
| 91 |
'git add model-bin/finetune/base/*',
|
| 92 |
-
'git commit -m "auto
|
| 93 |
'git push origin main'
|
| 94 |
]
|
| 95 |
for command in submit_commands:
|
|
|
|
| 89 |
def commit_checkpoint():
|
| 90 |
submit_commands = [
|
| 91 |
'git add model-bin/finetune/base/*',
|
| 92 |
+
'git commit -m "auto-commit"',
|
| 93 |
'git push origin main'
|
| 94 |
]
|
| 95 |
for command in submit_commands:
|
model-bin/finetune/base/checkpoint-8212/pytorch_model.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c9ceae04a6996246ad6724a00462738dd9f08e397a51090cccaabc78fd64342b
|
| 3 |
-
size 377909911
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8212/rng_state.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:369a5523390db45490fad840928af02d3bc252ff7791aae167c93deca4d91b7f
|
| 3 |
-
size 14503
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8212/scaler.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ce3caf9fa11cbc306f2acff0c04546dc0430f5ec115580be46b6feffffb1db78
|
| 3 |
-
size 559
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8212/trainer_state.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-bin/finetune/base/checkpoint-8336/config.json
DELETED
|
@@ -1,78 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_name_or_path": "./model-bin/pretrained/base",
|
| 3 |
-
"activation_dropout": 0.1,
|
| 4 |
-
"apply_spec_augment": true,
|
| 5 |
-
"architectures": [
|
| 6 |
-
"Wav2Vec2ForCTC"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.1,
|
| 9 |
-
"bos_token_id": 1,
|
| 10 |
-
"codevector_dim": 256,
|
| 11 |
-
"contrastive_logits_temperature": 0.1,
|
| 12 |
-
"conv_bias": false,
|
| 13 |
-
"conv_dim": [
|
| 14 |
-
512,
|
| 15 |
-
512,
|
| 16 |
-
512,
|
| 17 |
-
512,
|
| 18 |
-
512,
|
| 19 |
-
512,
|
| 20 |
-
512
|
| 21 |
-
],
|
| 22 |
-
"conv_kernel": [
|
| 23 |
-
10,
|
| 24 |
-
3,
|
| 25 |
-
3,
|
| 26 |
-
3,
|
| 27 |
-
3,
|
| 28 |
-
2,
|
| 29 |
-
2
|
| 30 |
-
],
|
| 31 |
-
"conv_stride": [
|
| 32 |
-
5,
|
| 33 |
-
2,
|
| 34 |
-
2,
|
| 35 |
-
2,
|
| 36 |
-
2,
|
| 37 |
-
2,
|
| 38 |
-
2
|
| 39 |
-
],
|
| 40 |
-
"ctc_loss_reduction": "mean",
|
| 41 |
-
"ctc_zero_infinity": false,
|
| 42 |
-
"diversity_loss_weight": 0.1,
|
| 43 |
-
"do_stable_layer_norm": false,
|
| 44 |
-
"eos_token_id": 2,
|
| 45 |
-
"feat_extract_activation": "gelu",
|
| 46 |
-
"feat_extract_dropout": 0.0,
|
| 47 |
-
"feat_extract_norm": "group",
|
| 48 |
-
"feat_proj_dropout": 0.1,
|
| 49 |
-
"feat_quantizer_dropout": 0.0,
|
| 50 |
-
"final_dropout": 0.1,
|
| 51 |
-
"gradient_checkpointing": true,
|
| 52 |
-
"hidden_act": "gelu",
|
| 53 |
-
"hidden_dropout": 0.1,
|
| 54 |
-
"hidden_dropout_prob": 0.1,
|
| 55 |
-
"hidden_size": 768,
|
| 56 |
-
"initializer_range": 0.02,
|
| 57 |
-
"intermediate_size": 3072,
|
| 58 |
-
"layer_norm_eps": 1e-05,
|
| 59 |
-
"layerdrop": 0.1,
|
| 60 |
-
"mask_feature_length": 10,
|
| 61 |
-
"mask_feature_prob": 0.0,
|
| 62 |
-
"mask_time_length": 10,
|
| 63 |
-
"mask_time_prob": 0.05,
|
| 64 |
-
"model_type": "wav2vec2",
|
| 65 |
-
"num_attention_heads": 12,
|
| 66 |
-
"num_codevector_groups": 2,
|
| 67 |
-
"num_codevectors_per_group": 320,
|
| 68 |
-
"num_conv_pos_embedding_groups": 16,
|
| 69 |
-
"num_conv_pos_embeddings": 128,
|
| 70 |
-
"num_feat_extract_layers": 7,
|
| 71 |
-
"num_hidden_layers": 12,
|
| 72 |
-
"num_negatives": 100,
|
| 73 |
-
"pad_token_id": 109,
|
| 74 |
-
"proj_codevector_dim": 256,
|
| 75 |
-
"torch_dtype": "float32",
|
| 76 |
-
"transformers_version": "4.9.2",
|
| 77 |
-
"vocab_size": 110
|
| 78 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8336/optimizer.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:66d052e5cd2897e521431ffac494367dd2fc0682d7655dcef9e944a91f3f9462
|
| 3 |
-
size 722165009
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8336/preprocessor_config.json
DELETED
|
@@ -1,9 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"do_normalize": true,
|
| 3 |
-
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
| 4 |
-
"feature_size": 1,
|
| 5 |
-
"padding_side": "right",
|
| 6 |
-
"padding_value": 0.0,
|
| 7 |
-
"return_attention_mask": false,
|
| 8 |
-
"sampling_rate": 16000
|
| 9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8336/scheduler.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:52cdb53167833ea5ec421b4bc4792a7a88ca2114418372fb1cd508a5ebcca6b1
|
| 3 |
-
size 623
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8336/trainer_state.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-bin/finetune/base/checkpoint-8336/training_args.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:53d5f6b64d43d7a5c22d92f1aa4114a0ed5474a4ee16170f4a83fcd9522f9c6e
|
| 3 |
-
size 2671
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8709/config.json
DELETED
|
@@ -1,78 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_name_or_path": "./model-bin/pretrained/base",
|
| 3 |
-
"activation_dropout": 0.1,
|
| 4 |
-
"apply_spec_augment": true,
|
| 5 |
-
"architectures": [
|
| 6 |
-
"Wav2Vec2ForCTC"
|
| 7 |
-
],
|
| 8 |
-
"attention_dropout": 0.1,
|
| 9 |
-
"bos_token_id": 1,
|
| 10 |
-
"codevector_dim": 256,
|
| 11 |
-
"contrastive_logits_temperature": 0.1,
|
| 12 |
-
"conv_bias": false,
|
| 13 |
-
"conv_dim": [
|
| 14 |
-
512,
|
| 15 |
-
512,
|
| 16 |
-
512,
|
| 17 |
-
512,
|
| 18 |
-
512,
|
| 19 |
-
512,
|
| 20 |
-
512
|
| 21 |
-
],
|
| 22 |
-
"conv_kernel": [
|
| 23 |
-
10,
|
| 24 |
-
3,
|
| 25 |
-
3,
|
| 26 |
-
3,
|
| 27 |
-
3,
|
| 28 |
-
2,
|
| 29 |
-
2
|
| 30 |
-
],
|
| 31 |
-
"conv_stride": [
|
| 32 |
-
5,
|
| 33 |
-
2,
|
| 34 |
-
2,
|
| 35 |
-
2,
|
| 36 |
-
2,
|
| 37 |
-
2,
|
| 38 |
-
2
|
| 39 |
-
],
|
| 40 |
-
"ctc_loss_reduction": "mean",
|
| 41 |
-
"ctc_zero_infinity": false,
|
| 42 |
-
"diversity_loss_weight": 0.1,
|
| 43 |
-
"do_stable_layer_norm": false,
|
| 44 |
-
"eos_token_id": 2,
|
| 45 |
-
"feat_extract_activation": "gelu",
|
| 46 |
-
"feat_extract_dropout": 0.0,
|
| 47 |
-
"feat_extract_norm": "group",
|
| 48 |
-
"feat_proj_dropout": 0.1,
|
| 49 |
-
"feat_quantizer_dropout": 0.0,
|
| 50 |
-
"final_dropout": 0.1,
|
| 51 |
-
"gradient_checkpointing": true,
|
| 52 |
-
"hidden_act": "gelu",
|
| 53 |
-
"hidden_dropout": 0.1,
|
| 54 |
-
"hidden_dropout_prob": 0.1,
|
| 55 |
-
"hidden_size": 768,
|
| 56 |
-
"initializer_range": 0.02,
|
| 57 |
-
"intermediate_size": 3072,
|
| 58 |
-
"layer_norm_eps": 1e-05,
|
| 59 |
-
"layerdrop": 0.1,
|
| 60 |
-
"mask_feature_length": 10,
|
| 61 |
-
"mask_feature_prob": 0.0,
|
| 62 |
-
"mask_time_length": 10,
|
| 63 |
-
"mask_time_prob": 0.05,
|
| 64 |
-
"model_type": "wav2vec2",
|
| 65 |
-
"num_attention_heads": 12,
|
| 66 |
-
"num_codevector_groups": 2,
|
| 67 |
-
"num_codevectors_per_group": 320,
|
| 68 |
-
"num_conv_pos_embedding_groups": 16,
|
| 69 |
-
"num_conv_pos_embeddings": 128,
|
| 70 |
-
"num_feat_extract_layers": 7,
|
| 71 |
-
"num_hidden_layers": 12,
|
| 72 |
-
"num_negatives": 100,
|
| 73 |
-
"pad_token_id": 109,
|
| 74 |
-
"proj_codevector_dim": 256,
|
| 75 |
-
"torch_dtype": "float32",
|
| 76 |
-
"transformers_version": "4.9.2",
|
| 77 |
-
"vocab_size": 110
|
| 78 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8709/optimizer.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:415b0246c463cd5ceb4041b8f28843eda1a8031b909e1738d3c2a92a64e7c7d6
|
| 3 |
-
size 722165009
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8709/preprocessor_config.json
DELETED
|
@@ -1,9 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"do_normalize": true,
|
| 3 |
-
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
| 4 |
-
"feature_size": 1,
|
| 5 |
-
"padding_side": "right",
|
| 6 |
-
"padding_value": 0.0,
|
| 7 |
-
"return_attention_mask": false,
|
| 8 |
-
"sampling_rate": 16000
|
| 9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8709/pytorch_model.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:5c6dc6a2981c4ed21a7a862e57603e9e9f7c71d41e65b7c711ef7d511ef61fc8
|
| 3 |
-
size 377909911
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8709/scheduler.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:36c2d8351b8dc01af8da53704884d2363d46bcdd9e86d18e980d29bde05879ec
|
| 3 |
-
size 623
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/checkpoint-8709/training_args.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:53d5f6b64d43d7a5c22d92f1aa4114a0ed5474a4ee16170f4a83fcd9522f9c6e
|
| 3 |
-
size 2671
|
|
|
|
|
|
|
|
|
|
|
|
model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 722165009
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d66880966a57a8c87bdda1b410b4f3e68c34d3b8a424197028887abc7e150c55
|
| 3 |
size 722165009
|
model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/preprocessor_config.json
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-8336 β checkpoint-8958}/pytorch_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377909911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c254f8ad282308ea33bb83c758ef8293899f150adc41b570833d7b458c78bfc
|
| 3 |
size 377909911
|
model-bin/finetune/base/{checkpoint-8709 β checkpoint-8958}/rng_state.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a91a6dcfed691871e13767ddc06caddb7fcdfd1c0a09919cf6d6f6999f8cf961
|
| 3 |
+
size 14567
|
model-bin/finetune/base/{checkpoint-8709 β checkpoint-8958}/scaler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13952a1cb28382e67abf3075b93cfb164b5d815d475d2f1573c4e3e812c1dba1
|
| 3 |
size 559
|
model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfaf4fa12293f359b8f566ee48e8288f4f3121c30617ecd31186b7e3d795a959
|
| 3 |
size 623
|
model-bin/finetune/base/{checkpoint-8709 β checkpoint-8958}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -52890,11 +52890,1523 @@
|
|
| 52890 |
"eval_steps_per_second": 0.631,
|
| 52891 |
"eval_wer": 0.7469591821505858,
|
| 52892 |
"step": 8709
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52893 |
}
|
| 52894 |
],
|
| 52895 |
"max_steps": 620000,
|
| 52896 |
"num_train_epochs": 5000,
|
| 52897 |
-
"total_flos": 2.
|
| 52898 |
"trial_name": null,
|
| 52899 |
"trial_params": null
|
| 52900 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.6824567855829349,
|
| 3 |
+
"best_model_checkpoint": "./model-bin/finetune/base/checkpoint-8958",
|
| 4 |
+
"epoch": 72.0,
|
| 5 |
+
"global_step": 8958,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 52890 |
"eval_steps_per_second": 0.631,
|
| 52891 |
"eval_wer": 0.7469591821505858,
|
| 52892 |
"step": 8709
|
| 52893 |
+
},
|
| 52894 |
+
{
|
| 52895 |
+
"epoch": 69.01,
|
| 52896 |
+
"learning_rate": 9.875525040387724e-06,
|
| 52897 |
+
"loss": 1.6153,
|
| 52898 |
+
"step": 8710
|
| 52899 |
+
},
|
| 52900 |
+
{
|
| 52901 |
+
"epoch": 69.02,
|
| 52902 |
+
"learning_rate": 9.87550888529887e-06,
|
| 52903 |
+
"loss": 1.4633,
|
| 52904 |
+
"step": 8711
|
| 52905 |
+
},
|
| 52906 |
+
{
|
| 52907 |
+
"epoch": 69.02,
|
| 52908 |
+
"learning_rate": 9.875492730210016e-06,
|
| 52909 |
+
"loss": 1.5134,
|
| 52910 |
+
"step": 8712
|
| 52911 |
+
},
|
| 52912 |
+
{
|
| 52913 |
+
"epoch": 69.03,
|
| 52914 |
+
"learning_rate": 9.875476575121163e-06,
|
| 52915 |
+
"loss": 1.5621,
|
| 52916 |
+
"step": 8713
|
| 52917 |
+
},
|
| 52918 |
+
{
|
| 52919 |
+
"epoch": 69.04,
|
| 52920 |
+
"learning_rate": 9.875460420032311e-06,
|
| 52921 |
+
"loss": 1.4003,
|
| 52922 |
+
"step": 8714
|
| 52923 |
+
},
|
| 52924 |
+
{
|
| 52925 |
+
"epoch": 69.05,
|
| 52926 |
+
"learning_rate": 9.875444264943458e-06,
|
| 52927 |
+
"loss": 1.54,
|
| 52928 |
+
"step": 8715
|
| 52929 |
+
},
|
| 52930 |
+
{
|
| 52931 |
+
"epoch": 69.06,
|
| 52932 |
+
"learning_rate": 9.875428109854605e-06,
|
| 52933 |
+
"loss": 1.3723,
|
| 52934 |
+
"step": 8716
|
| 52935 |
+
},
|
| 52936 |
+
{
|
| 52937 |
+
"epoch": 69.06,
|
| 52938 |
+
"learning_rate": 9.875411954765752e-06,
|
| 52939 |
+
"loss": 1.672,
|
| 52940 |
+
"step": 8717
|
| 52941 |
+
},
|
| 52942 |
+
{
|
| 52943 |
+
"epoch": 69.07,
|
| 52944 |
+
"learning_rate": 9.875395799676899e-06,
|
| 52945 |
+
"loss": 1.5243,
|
| 52946 |
+
"step": 8718
|
| 52947 |
+
},
|
| 52948 |
+
{
|
| 52949 |
+
"epoch": 69.08,
|
| 52950 |
+
"learning_rate": 9.875379644588046e-06,
|
| 52951 |
+
"loss": 1.593,
|
| 52952 |
+
"step": 8719
|
| 52953 |
+
},
|
| 52954 |
+
{
|
| 52955 |
+
"epoch": 69.09,
|
| 52956 |
+
"learning_rate": 9.875363489499194e-06,
|
| 52957 |
+
"loss": 1.3595,
|
| 52958 |
+
"step": 8720
|
| 52959 |
+
},
|
| 52960 |
+
{
|
| 52961 |
+
"epoch": 69.1,
|
| 52962 |
+
"learning_rate": 9.87534733441034e-06,
|
| 52963 |
+
"loss": 1.4388,
|
| 52964 |
+
"step": 8721
|
| 52965 |
+
},
|
| 52966 |
+
{
|
| 52967 |
+
"epoch": 69.1,
|
| 52968 |
+
"learning_rate": 9.875331179321486e-06,
|
| 52969 |
+
"loss": 1.3854,
|
| 52970 |
+
"step": 8722
|
| 52971 |
+
},
|
| 52972 |
+
{
|
| 52973 |
+
"epoch": 69.11,
|
| 52974 |
+
"learning_rate": 9.875315024232635e-06,
|
| 52975 |
+
"loss": 1.8071,
|
| 52976 |
+
"step": 8723
|
| 52977 |
+
},
|
| 52978 |
+
{
|
| 52979 |
+
"epoch": 69.12,
|
| 52980 |
+
"learning_rate": 9.875298869143781e-06,
|
| 52981 |
+
"loss": 1.5355,
|
| 52982 |
+
"step": 8724
|
| 52983 |
+
},
|
| 52984 |
+
{
|
| 52985 |
+
"epoch": 69.13,
|
| 52986 |
+
"learning_rate": 9.875282714054928e-06,
|
| 52987 |
+
"loss": 1.4477,
|
| 52988 |
+
"step": 8725
|
| 52989 |
+
},
|
| 52990 |
+
{
|
| 52991 |
+
"epoch": 69.14,
|
| 52992 |
+
"learning_rate": 9.875266558966075e-06,
|
| 52993 |
+
"loss": 1.5663,
|
| 52994 |
+
"step": 8726
|
| 52995 |
+
},
|
| 52996 |
+
{
|
| 52997 |
+
"epoch": 69.14,
|
| 52998 |
+
"learning_rate": 9.875250403877222e-06,
|
| 52999 |
+
"loss": 1.7814,
|
| 53000 |
+
"step": 8727
|
| 53001 |
+
},
|
| 53002 |
+
{
|
| 53003 |
+
"epoch": 69.15,
|
| 53004 |
+
"learning_rate": 9.875234248788369e-06,
|
| 53005 |
+
"loss": 1.4415,
|
| 53006 |
+
"step": 8728
|
| 53007 |
+
},
|
| 53008 |
+
{
|
| 53009 |
+
"epoch": 69.16,
|
| 53010 |
+
"learning_rate": 9.875218093699516e-06,
|
| 53011 |
+
"loss": 1.7214,
|
| 53012 |
+
"step": 8729
|
| 53013 |
+
},
|
| 53014 |
+
{
|
| 53015 |
+
"epoch": 69.17,
|
| 53016 |
+
"learning_rate": 9.875201938610664e-06,
|
| 53017 |
+
"loss": 1.8272,
|
| 53018 |
+
"step": 8730
|
| 53019 |
+
},
|
| 53020 |
+
{
|
| 53021 |
+
"epoch": 69.18,
|
| 53022 |
+
"learning_rate": 9.875185783521811e-06,
|
| 53023 |
+
"loss": 1.8333,
|
| 53024 |
+
"step": 8731
|
| 53025 |
+
},
|
| 53026 |
+
{
|
| 53027 |
+
"epoch": 69.18,
|
| 53028 |
+
"learning_rate": 9.875169628432956e-06,
|
| 53029 |
+
"loss": 2.2468,
|
| 53030 |
+
"step": 8732
|
| 53031 |
+
},
|
| 53032 |
+
{
|
| 53033 |
+
"epoch": 69.19,
|
| 53034 |
+
"learning_rate": 9.875153473344105e-06,
|
| 53035 |
+
"loss": 2.7862,
|
| 53036 |
+
"step": 8733
|
| 53037 |
+
},
|
| 53038 |
+
{
|
| 53039 |
+
"epoch": 69.2,
|
| 53040 |
+
"learning_rate": 9.875137318255251e-06,
|
| 53041 |
+
"loss": 3.3676,
|
| 53042 |
+
"step": 8734
|
| 53043 |
+
},
|
| 53044 |
+
{
|
| 53045 |
+
"epoch": 69.21,
|
| 53046 |
+
"learning_rate": 9.875121163166398e-06,
|
| 53047 |
+
"loss": 1.5527,
|
| 53048 |
+
"step": 8735
|
| 53049 |
+
},
|
| 53050 |
+
{
|
| 53051 |
+
"epoch": 69.22,
|
| 53052 |
+
"learning_rate": 9.875105008077545e-06,
|
| 53053 |
+
"loss": 1.4331,
|
| 53054 |
+
"step": 8736
|
| 53055 |
+
},
|
| 53056 |
+
{
|
| 53057 |
+
"epoch": 69.22,
|
| 53058 |
+
"learning_rate": 9.875088852988692e-06,
|
| 53059 |
+
"loss": 1.3974,
|
| 53060 |
+
"step": 8737
|
| 53061 |
+
},
|
| 53062 |
+
{
|
| 53063 |
+
"epoch": 69.23,
|
| 53064 |
+
"learning_rate": 9.875072697899839e-06,
|
| 53065 |
+
"loss": 1.3391,
|
| 53066 |
+
"step": 8738
|
| 53067 |
+
},
|
| 53068 |
+
{
|
| 53069 |
+
"epoch": 69.24,
|
| 53070 |
+
"learning_rate": 9.875056542810987e-06,
|
| 53071 |
+
"loss": 1.3186,
|
| 53072 |
+
"step": 8739
|
| 53073 |
+
},
|
| 53074 |
+
{
|
| 53075 |
+
"epoch": 69.25,
|
| 53076 |
+
"learning_rate": 9.875040387722134e-06,
|
| 53077 |
+
"loss": 1.5476,
|
| 53078 |
+
"step": 8740
|
| 53079 |
+
},
|
| 53080 |
+
{
|
| 53081 |
+
"epoch": 69.26,
|
| 53082 |
+
"learning_rate": 9.875024232633281e-06,
|
| 53083 |
+
"loss": 1.6061,
|
| 53084 |
+
"step": 8741
|
| 53085 |
+
},
|
| 53086 |
+
{
|
| 53087 |
+
"epoch": 69.26,
|
| 53088 |
+
"learning_rate": 9.875008077544426e-06,
|
| 53089 |
+
"loss": 1.5964,
|
| 53090 |
+
"step": 8742
|
| 53091 |
+
},
|
| 53092 |
+
{
|
| 53093 |
+
"epoch": 69.27,
|
| 53094 |
+
"learning_rate": 9.874991922455575e-06,
|
| 53095 |
+
"loss": 1.4022,
|
| 53096 |
+
"step": 8743
|
| 53097 |
+
},
|
| 53098 |
+
{
|
| 53099 |
+
"epoch": 69.28,
|
| 53100 |
+
"learning_rate": 9.874975767366721e-06,
|
| 53101 |
+
"loss": 1.6853,
|
| 53102 |
+
"step": 8744
|
| 53103 |
+
},
|
| 53104 |
+
{
|
| 53105 |
+
"epoch": 69.29,
|
| 53106 |
+
"learning_rate": 9.874959612277868e-06,
|
| 53107 |
+
"loss": 2.0233,
|
| 53108 |
+
"step": 8745
|
| 53109 |
+
},
|
| 53110 |
+
{
|
| 53111 |
+
"epoch": 69.3,
|
| 53112 |
+
"learning_rate": 9.874943457189015e-06,
|
| 53113 |
+
"loss": 1.4809,
|
| 53114 |
+
"step": 8746
|
| 53115 |
+
},
|
| 53116 |
+
{
|
| 53117 |
+
"epoch": 69.3,
|
| 53118 |
+
"learning_rate": 9.874927302100162e-06,
|
| 53119 |
+
"loss": 1.3434,
|
| 53120 |
+
"step": 8747
|
| 53121 |
+
},
|
| 53122 |
+
{
|
| 53123 |
+
"epoch": 69.31,
|
| 53124 |
+
"learning_rate": 9.874911147011309e-06,
|
| 53125 |
+
"loss": 1.5711,
|
| 53126 |
+
"step": 8748
|
| 53127 |
+
},
|
| 53128 |
+
{
|
| 53129 |
+
"epoch": 69.32,
|
| 53130 |
+
"learning_rate": 9.874894991922457e-06,
|
| 53131 |
+
"loss": 1.5846,
|
| 53132 |
+
"step": 8749
|
| 53133 |
+
},
|
| 53134 |
+
{
|
| 53135 |
+
"epoch": 69.33,
|
| 53136 |
+
"learning_rate": 9.874878836833604e-06,
|
| 53137 |
+
"loss": 1.5662,
|
| 53138 |
+
"step": 8750
|
| 53139 |
+
},
|
| 53140 |
+
{
|
| 53141 |
+
"epoch": 69.34,
|
| 53142 |
+
"learning_rate": 9.874862681744751e-06,
|
| 53143 |
+
"loss": 1.5346,
|
| 53144 |
+
"step": 8751
|
| 53145 |
+
},
|
| 53146 |
+
{
|
| 53147 |
+
"epoch": 69.34,
|
| 53148 |
+
"learning_rate": 9.874846526655898e-06,
|
| 53149 |
+
"loss": 1.5122,
|
| 53150 |
+
"step": 8752
|
| 53151 |
+
},
|
| 53152 |
+
{
|
| 53153 |
+
"epoch": 69.35,
|
| 53154 |
+
"learning_rate": 9.874830371567045e-06,
|
| 53155 |
+
"loss": 1.6172,
|
| 53156 |
+
"step": 8753
|
| 53157 |
+
},
|
| 53158 |
+
{
|
| 53159 |
+
"epoch": 69.36,
|
| 53160 |
+
"learning_rate": 9.874814216478191e-06,
|
| 53161 |
+
"loss": 1.6781,
|
| 53162 |
+
"step": 8754
|
| 53163 |
+
},
|
| 53164 |
+
{
|
| 53165 |
+
"epoch": 69.37,
|
| 53166 |
+
"learning_rate": 9.874798061389338e-06,
|
| 53167 |
+
"loss": 1.8096,
|
| 53168 |
+
"step": 8755
|
| 53169 |
+
},
|
| 53170 |
+
{
|
| 53171 |
+
"epoch": 69.38,
|
| 53172 |
+
"learning_rate": 9.874781906300485e-06,
|
| 53173 |
+
"loss": 2.1775,
|
| 53174 |
+
"step": 8756
|
| 53175 |
+
},
|
| 53176 |
+
{
|
| 53177 |
+
"epoch": 69.38,
|
| 53178 |
+
"learning_rate": 9.874765751211632e-06,
|
| 53179 |
+
"loss": 2.7198,
|
| 53180 |
+
"step": 8757
|
| 53181 |
+
},
|
| 53182 |
+
{
|
| 53183 |
+
"epoch": 69.39,
|
| 53184 |
+
"learning_rate": 9.874749596122779e-06,
|
| 53185 |
+
"loss": 2.4698,
|
| 53186 |
+
"step": 8758
|
| 53187 |
+
},
|
| 53188 |
+
{
|
| 53189 |
+
"epoch": 69.4,
|
| 53190 |
+
"learning_rate": 9.874733441033927e-06,
|
| 53191 |
+
"loss": 3.2068,
|
| 53192 |
+
"step": 8759
|
| 53193 |
+
},
|
| 53194 |
+
{
|
| 53195 |
+
"epoch": 69.41,
|
| 53196 |
+
"learning_rate": 9.874717285945074e-06,
|
| 53197 |
+
"loss": 1.5313,
|
| 53198 |
+
"step": 8760
|
| 53199 |
+
},
|
| 53200 |
+
{
|
| 53201 |
+
"epoch": 69.42,
|
| 53202 |
+
"learning_rate": 9.874701130856221e-06,
|
| 53203 |
+
"loss": 1.5581,
|
| 53204 |
+
"step": 8761
|
| 53205 |
+
},
|
| 53206 |
+
{
|
| 53207 |
+
"epoch": 69.42,
|
| 53208 |
+
"learning_rate": 9.874684975767368e-06,
|
| 53209 |
+
"loss": 1.4189,
|
| 53210 |
+
"step": 8762
|
| 53211 |
+
},
|
| 53212 |
+
{
|
| 53213 |
+
"epoch": 69.43,
|
| 53214 |
+
"learning_rate": 9.874668820678515e-06,
|
| 53215 |
+
"loss": 1.5631,
|
| 53216 |
+
"step": 8763
|
| 53217 |
+
},
|
| 53218 |
+
{
|
| 53219 |
+
"epoch": 69.44,
|
| 53220 |
+
"learning_rate": 9.874652665589661e-06,
|
| 53221 |
+
"loss": 1.3352,
|
| 53222 |
+
"step": 8764
|
| 53223 |
+
},
|
| 53224 |
+
{
|
| 53225 |
+
"epoch": 69.45,
|
| 53226 |
+
"learning_rate": 9.87463651050081e-06,
|
| 53227 |
+
"loss": 1.4554,
|
| 53228 |
+
"step": 8765
|
| 53229 |
+
},
|
| 53230 |
+
{
|
| 53231 |
+
"epoch": 69.46,
|
| 53232 |
+
"learning_rate": 9.874620355411955e-06,
|
| 53233 |
+
"loss": 1.3733,
|
| 53234 |
+
"step": 8766
|
| 53235 |
+
},
|
| 53236 |
+
{
|
| 53237 |
+
"epoch": 69.46,
|
| 53238 |
+
"learning_rate": 9.874604200323102e-06,
|
| 53239 |
+
"loss": 1.7026,
|
| 53240 |
+
"step": 8767
|
| 53241 |
+
},
|
| 53242 |
+
{
|
| 53243 |
+
"epoch": 69.47,
|
| 53244 |
+
"learning_rate": 9.874588045234249e-06,
|
| 53245 |
+
"loss": 1.5812,
|
| 53246 |
+
"step": 8768
|
| 53247 |
+
},
|
| 53248 |
+
{
|
| 53249 |
+
"epoch": 69.48,
|
| 53250 |
+
"learning_rate": 9.874571890145397e-06,
|
| 53251 |
+
"loss": 1.3614,
|
| 53252 |
+
"step": 8769
|
| 53253 |
+
},
|
| 53254 |
+
{
|
| 53255 |
+
"epoch": 69.49,
|
| 53256 |
+
"learning_rate": 9.874555735056544e-06,
|
| 53257 |
+
"loss": 1.3396,
|
| 53258 |
+
"step": 8770
|
| 53259 |
+
},
|
| 53260 |
+
{
|
| 53261 |
+
"epoch": 69.5,
|
| 53262 |
+
"learning_rate": 9.874539579967691e-06,
|
| 53263 |
+
"loss": 1.5401,
|
| 53264 |
+
"step": 8771
|
| 53265 |
+
},
|
| 53266 |
+
{
|
| 53267 |
+
"epoch": 69.5,
|
| 53268 |
+
"learning_rate": 9.874523424878838e-06,
|
| 53269 |
+
"loss": 1.438,
|
| 53270 |
+
"step": 8772
|
| 53271 |
+
},
|
| 53272 |
+
{
|
| 53273 |
+
"epoch": 69.51,
|
| 53274 |
+
"learning_rate": 9.874507269789985e-06,
|
| 53275 |
+
"loss": 1.4894,
|
| 53276 |
+
"step": 8773
|
| 53277 |
+
},
|
| 53278 |
+
{
|
| 53279 |
+
"epoch": 69.52,
|
| 53280 |
+
"learning_rate": 9.874491114701131e-06,
|
| 53281 |
+
"loss": 1.3303,
|
| 53282 |
+
"step": 8774
|
| 53283 |
+
},
|
| 53284 |
+
{
|
| 53285 |
+
"epoch": 69.53,
|
| 53286 |
+
"learning_rate": 9.87447495961228e-06,
|
| 53287 |
+
"loss": 1.3972,
|
| 53288 |
+
"step": 8775
|
| 53289 |
+
},
|
| 53290 |
+
{
|
| 53291 |
+
"epoch": 69.54,
|
| 53292 |
+
"learning_rate": 9.874458804523425e-06,
|
| 53293 |
+
"loss": 1.6936,
|
| 53294 |
+
"step": 8776
|
| 53295 |
+
},
|
| 53296 |
+
{
|
| 53297 |
+
"epoch": 69.54,
|
| 53298 |
+
"learning_rate": 9.874442649434572e-06,
|
| 53299 |
+
"loss": 1.5058,
|
| 53300 |
+
"step": 8777
|
| 53301 |
+
},
|
| 53302 |
+
{
|
| 53303 |
+
"epoch": 69.55,
|
| 53304 |
+
"learning_rate": 9.87442649434572e-06,
|
| 53305 |
+
"loss": 1.5953,
|
| 53306 |
+
"step": 8778
|
| 53307 |
+
},
|
| 53308 |
+
{
|
| 53309 |
+
"epoch": 69.56,
|
| 53310 |
+
"learning_rate": 9.874410339256867e-06,
|
| 53311 |
+
"loss": 1.7185,
|
| 53312 |
+
"step": 8779
|
| 53313 |
+
},
|
| 53314 |
+
{
|
| 53315 |
+
"epoch": 69.57,
|
| 53316 |
+
"learning_rate": 9.874394184168014e-06,
|
| 53317 |
+
"loss": 1.956,
|
| 53318 |
+
"step": 8780
|
| 53319 |
+
},
|
| 53320 |
+
{
|
| 53321 |
+
"epoch": 69.58,
|
| 53322 |
+
"learning_rate": 9.87437802907916e-06,
|
| 53323 |
+
"loss": 1.9566,
|
| 53324 |
+
"step": 8781
|
| 53325 |
+
},
|
| 53326 |
+
{
|
| 53327 |
+
"epoch": 69.58,
|
| 53328 |
+
"learning_rate": 9.874361873990308e-06,
|
| 53329 |
+
"loss": 2.2227,
|
| 53330 |
+
"step": 8782
|
| 53331 |
+
},
|
| 53332 |
+
{
|
| 53333 |
+
"epoch": 69.59,
|
| 53334 |
+
"learning_rate": 9.874345718901454e-06,
|
| 53335 |
+
"loss": 2.3693,
|
| 53336 |
+
"step": 8783
|
| 53337 |
+
},
|
| 53338 |
+
{
|
| 53339 |
+
"epoch": 69.6,
|
| 53340 |
+
"learning_rate": 9.874329563812601e-06,
|
| 53341 |
+
"loss": 2.9933,
|
| 53342 |
+
"step": 8784
|
| 53343 |
+
},
|
| 53344 |
+
{
|
| 53345 |
+
"epoch": 69.61,
|
| 53346 |
+
"learning_rate": 9.87431340872375e-06,
|
| 53347 |
+
"loss": 2.1501,
|
| 53348 |
+
"step": 8785
|
| 53349 |
+
},
|
| 53350 |
+
{
|
| 53351 |
+
"epoch": 69.62,
|
| 53352 |
+
"learning_rate": 9.874297253634895e-06,
|
| 53353 |
+
"loss": 2.1441,
|
| 53354 |
+
"step": 8786
|
| 53355 |
+
},
|
| 53356 |
+
{
|
| 53357 |
+
"epoch": 69.62,
|
| 53358 |
+
"learning_rate": 9.874281098546042e-06,
|
| 53359 |
+
"loss": 1.6236,
|
| 53360 |
+
"step": 8787
|
| 53361 |
+
},
|
| 53362 |
+
{
|
| 53363 |
+
"epoch": 69.63,
|
| 53364 |
+
"learning_rate": 9.87426494345719e-06,
|
| 53365 |
+
"loss": 1.4453,
|
| 53366 |
+
"step": 8788
|
| 53367 |
+
},
|
| 53368 |
+
{
|
| 53369 |
+
"epoch": 69.64,
|
| 53370 |
+
"learning_rate": 9.874248788368337e-06,
|
| 53371 |
+
"loss": 1.4271,
|
| 53372 |
+
"step": 8789
|
| 53373 |
+
},
|
| 53374 |
+
{
|
| 53375 |
+
"epoch": 69.65,
|
| 53376 |
+
"learning_rate": 9.874232633279484e-06,
|
| 53377 |
+
"loss": 2.0905,
|
| 53378 |
+
"step": 8790
|
| 53379 |
+
},
|
| 53380 |
+
{
|
| 53381 |
+
"epoch": 69.66,
|
| 53382 |
+
"learning_rate": 9.87421647819063e-06,
|
| 53383 |
+
"loss": 1.6809,
|
| 53384 |
+
"step": 8791
|
| 53385 |
+
},
|
| 53386 |
+
{
|
| 53387 |
+
"epoch": 69.66,
|
| 53388 |
+
"learning_rate": 9.874200323101778e-06,
|
| 53389 |
+
"loss": 2.1893,
|
| 53390 |
+
"step": 8792
|
| 53391 |
+
},
|
| 53392 |
+
{
|
| 53393 |
+
"epoch": 69.67,
|
| 53394 |
+
"learning_rate": 9.874184168012924e-06,
|
| 53395 |
+
"loss": 1.4405,
|
| 53396 |
+
"step": 8793
|
| 53397 |
+
},
|
| 53398 |
+
{
|
| 53399 |
+
"epoch": 69.68,
|
| 53400 |
+
"learning_rate": 9.874168012924071e-06,
|
| 53401 |
+
"loss": 1.4139,
|
| 53402 |
+
"step": 8794
|
| 53403 |
+
},
|
| 53404 |
+
{
|
| 53405 |
+
"epoch": 69.69,
|
| 53406 |
+
"learning_rate": 9.87415185783522e-06,
|
| 53407 |
+
"loss": 1.5473,
|
| 53408 |
+
"step": 8795
|
| 53409 |
+
},
|
| 53410 |
+
{
|
| 53411 |
+
"epoch": 69.7,
|
| 53412 |
+
"learning_rate": 9.874135702746367e-06,
|
| 53413 |
+
"loss": 1.4509,
|
| 53414 |
+
"step": 8796
|
| 53415 |
+
},
|
| 53416 |
+
{
|
| 53417 |
+
"epoch": 69.7,
|
| 53418 |
+
"learning_rate": 9.874119547657512e-06,
|
| 53419 |
+
"loss": 1.3682,
|
| 53420 |
+
"step": 8797
|
| 53421 |
+
},
|
| 53422 |
+
{
|
| 53423 |
+
"epoch": 69.71,
|
| 53424 |
+
"learning_rate": 9.87410339256866e-06,
|
| 53425 |
+
"loss": 1.3525,
|
| 53426 |
+
"step": 8798
|
| 53427 |
+
},
|
| 53428 |
+
{
|
| 53429 |
+
"epoch": 69.72,
|
| 53430 |
+
"learning_rate": 9.874087237479807e-06,
|
| 53431 |
+
"loss": 1.5427,
|
| 53432 |
+
"step": 8799
|
| 53433 |
+
},
|
| 53434 |
+
{
|
| 53435 |
+
"epoch": 69.73,
|
| 53436 |
+
"learning_rate": 9.874071082390954e-06,
|
| 53437 |
+
"loss": 1.4963,
|
| 53438 |
+
"step": 8800
|
| 53439 |
+
},
|
| 53440 |
+
{
|
| 53441 |
+
"epoch": 69.74,
|
| 53442 |
+
"learning_rate": 9.8740549273021e-06,
|
| 53443 |
+
"loss": 1.3988,
|
| 53444 |
+
"step": 8801
|
| 53445 |
+
},
|
| 53446 |
+
{
|
| 53447 |
+
"epoch": 69.74,
|
| 53448 |
+
"learning_rate": 9.874038772213248e-06,
|
| 53449 |
+
"loss": 1.8708,
|
| 53450 |
+
"step": 8802
|
| 53451 |
+
},
|
| 53452 |
+
{
|
| 53453 |
+
"epoch": 69.75,
|
| 53454 |
+
"learning_rate": 9.874022617124394e-06,
|
| 53455 |
+
"loss": 1.7412,
|
| 53456 |
+
"step": 8803
|
| 53457 |
+
},
|
| 53458 |
+
{
|
| 53459 |
+
"epoch": 69.76,
|
| 53460 |
+
"learning_rate": 9.874006462035543e-06,
|
| 53461 |
+
"loss": 2.0043,
|
| 53462 |
+
"step": 8804
|
| 53463 |
+
},
|
| 53464 |
+
{
|
| 53465 |
+
"epoch": 69.77,
|
| 53466 |
+
"learning_rate": 9.87399030694669e-06,
|
| 53467 |
+
"loss": 2.1015,
|
| 53468 |
+
"step": 8805
|
| 53469 |
+
},
|
| 53470 |
+
{
|
| 53471 |
+
"epoch": 69.78,
|
| 53472 |
+
"learning_rate": 9.873974151857837e-06,
|
| 53473 |
+
"loss": 2.205,
|
| 53474 |
+
"step": 8806
|
| 53475 |
+
},
|
| 53476 |
+
{
|
| 53477 |
+
"epoch": 69.78,
|
| 53478 |
+
"learning_rate": 9.873957996768983e-06,
|
| 53479 |
+
"loss": 2.1445,
|
| 53480 |
+
"step": 8807
|
| 53481 |
+
},
|
| 53482 |
+
{
|
| 53483 |
+
"epoch": 69.79,
|
| 53484 |
+
"learning_rate": 9.87394184168013e-06,
|
| 53485 |
+
"loss": 2.7113,
|
| 53486 |
+
"step": 8808
|
| 53487 |
+
},
|
| 53488 |
+
{
|
| 53489 |
+
"epoch": 69.8,
|
| 53490 |
+
"learning_rate": 9.873925686591277e-06,
|
| 53491 |
+
"loss": 3.3649,
|
| 53492 |
+
"step": 8809
|
| 53493 |
+
},
|
| 53494 |
+
{
|
| 53495 |
+
"epoch": 69.81,
|
| 53496 |
+
"learning_rate": 9.873909531502424e-06,
|
| 53497 |
+
"loss": 1.4795,
|
| 53498 |
+
"step": 8810
|
| 53499 |
+
},
|
| 53500 |
+
{
|
| 53501 |
+
"epoch": 69.82,
|
| 53502 |
+
"learning_rate": 9.87389337641357e-06,
|
| 53503 |
+
"loss": 1.5267,
|
| 53504 |
+
"step": 8811
|
| 53505 |
+
},
|
| 53506 |
+
{
|
| 53507 |
+
"epoch": 69.82,
|
| 53508 |
+
"learning_rate": 9.873877221324718e-06,
|
| 53509 |
+
"loss": 1.5875,
|
| 53510 |
+
"step": 8812
|
| 53511 |
+
},
|
| 53512 |
+
{
|
| 53513 |
+
"epoch": 69.83,
|
| 53514 |
+
"learning_rate": 9.873861066235864e-06,
|
| 53515 |
+
"loss": 1.5205,
|
| 53516 |
+
"step": 8813
|
| 53517 |
+
},
|
| 53518 |
+
{
|
| 53519 |
+
"epoch": 69.84,
|
| 53520 |
+
"learning_rate": 9.873844911147013e-06,
|
| 53521 |
+
"loss": 1.3915,
|
| 53522 |
+
"step": 8814
|
| 53523 |
+
},
|
| 53524 |
+
{
|
| 53525 |
+
"epoch": 69.85,
|
| 53526 |
+
"learning_rate": 9.87382875605816e-06,
|
| 53527 |
+
"loss": 1.5903,
|
| 53528 |
+
"step": 8815
|
| 53529 |
+
},
|
| 53530 |
+
{
|
| 53531 |
+
"epoch": 69.86,
|
| 53532 |
+
"learning_rate": 9.873812600969307e-06,
|
| 53533 |
+
"loss": 1.4766,
|
| 53534 |
+
"step": 8816
|
| 53535 |
+
},
|
| 53536 |
+
{
|
| 53537 |
+
"epoch": 69.86,
|
| 53538 |
+
"learning_rate": 9.873796445880453e-06,
|
| 53539 |
+
"loss": 2.2292,
|
| 53540 |
+
"step": 8817
|
| 53541 |
+
},
|
| 53542 |
+
{
|
| 53543 |
+
"epoch": 69.87,
|
| 53544 |
+
"learning_rate": 9.8737802907916e-06,
|
| 53545 |
+
"loss": 1.6554,
|
| 53546 |
+
"step": 8818
|
| 53547 |
+
},
|
| 53548 |
+
{
|
| 53549 |
+
"epoch": 69.88,
|
| 53550 |
+
"learning_rate": 9.873764135702747e-06,
|
| 53551 |
+
"loss": 1.3882,
|
| 53552 |
+
"step": 8819
|
| 53553 |
+
},
|
| 53554 |
+
{
|
| 53555 |
+
"epoch": 69.89,
|
| 53556 |
+
"learning_rate": 9.873747980613894e-06,
|
| 53557 |
+
"loss": 1.5054,
|
| 53558 |
+
"step": 8820
|
| 53559 |
+
},
|
| 53560 |
+
{
|
| 53561 |
+
"epoch": 69.9,
|
| 53562 |
+
"learning_rate": 9.87373182552504e-06,
|
| 53563 |
+
"loss": 1.6373,
|
| 53564 |
+
"step": 8821
|
| 53565 |
+
},
|
| 53566 |
+
{
|
| 53567 |
+
"epoch": 69.9,
|
| 53568 |
+
"learning_rate": 9.873715670436188e-06,
|
| 53569 |
+
"loss": 1.5243,
|
| 53570 |
+
"step": 8822
|
| 53571 |
+
},
|
| 53572 |
+
{
|
| 53573 |
+
"epoch": 69.91,
|
| 53574 |
+
"learning_rate": 9.873699515347334e-06,
|
| 53575 |
+
"loss": 2.1341,
|
| 53576 |
+
"step": 8823
|
| 53577 |
+
},
|
| 53578 |
+
{
|
| 53579 |
+
"epoch": 69.92,
|
| 53580 |
+
"learning_rate": 9.873683360258483e-06,
|
| 53581 |
+
"loss": 1.317,
|
| 53582 |
+
"step": 8824
|
| 53583 |
+
},
|
| 53584 |
+
{
|
| 53585 |
+
"epoch": 69.93,
|
| 53586 |
+
"learning_rate": 9.87366720516963e-06,
|
| 53587 |
+
"loss": 1.7215,
|
| 53588 |
+
"step": 8825
|
| 53589 |
+
},
|
| 53590 |
+
{
|
| 53591 |
+
"epoch": 69.94,
|
| 53592 |
+
"learning_rate": 9.873651050080777e-06,
|
| 53593 |
+
"loss": 1.3331,
|
| 53594 |
+
"step": 8826
|
| 53595 |
+
},
|
| 53596 |
+
{
|
| 53597 |
+
"epoch": 69.94,
|
| 53598 |
+
"learning_rate": 9.873634894991923e-06,
|
| 53599 |
+
"loss": 1.6016,
|
| 53600 |
+
"step": 8827
|
| 53601 |
+
},
|
| 53602 |
+
{
|
| 53603 |
+
"epoch": 69.95,
|
| 53604 |
+
"learning_rate": 9.87361873990307e-06,
|
| 53605 |
+
"loss": 1.5569,
|
| 53606 |
+
"step": 8828
|
| 53607 |
+
},
|
| 53608 |
+
{
|
| 53609 |
+
"epoch": 69.96,
|
| 53610 |
+
"learning_rate": 9.873602584814217e-06,
|
| 53611 |
+
"loss": 2.2356,
|
| 53612 |
+
"step": 8829
|
| 53613 |
+
},
|
| 53614 |
+
{
|
| 53615 |
+
"epoch": 69.97,
|
| 53616 |
+
"learning_rate": 9.873586429725366e-06,
|
| 53617 |
+
"loss": 2.2153,
|
| 53618 |
+
"step": 8830
|
| 53619 |
+
},
|
| 53620 |
+
{
|
| 53621 |
+
"epoch": 69.98,
|
| 53622 |
+
"learning_rate": 9.87357027463651e-06,
|
| 53623 |
+
"loss": 1.9975,
|
| 53624 |
+
"step": 8831
|
| 53625 |
+
},
|
| 53626 |
+
{
|
| 53627 |
+
"epoch": 69.98,
|
| 53628 |
+
"learning_rate": 9.873554119547658e-06,
|
| 53629 |
+
"loss": 2.2298,
|
| 53630 |
+
"step": 8832
|
| 53631 |
+
},
|
| 53632 |
+
{
|
| 53633 |
+
"epoch": 69.99,
|
| 53634 |
+
"learning_rate": 9.873537964458806e-06,
|
| 53635 |
+
"loss": 2.9414,
|
| 53636 |
+
"step": 8833
|
| 53637 |
+
},
|
| 53638 |
+
{
|
| 53639 |
+
"epoch": 70.0,
|
| 53640 |
+
"learning_rate": 9.873521809369953e-06,
|
| 53641 |
+
"loss": 3.4378,
|
| 53642 |
+
"step": 8834
|
| 53643 |
+
},
|
| 53644 |
+
{
|
| 53645 |
+
"epoch": 70.0,
|
| 53646 |
+
"eval_loss": 1.3710267543792725,
|
| 53647 |
+
"eval_runtime": 43.6624,
|
| 53648 |
+
"eval_samples_per_second": 19.147,
|
| 53649 |
+
"eval_steps_per_second": 0.618,
|
| 53650 |
+
"eval_wer": 0.7103508263264714,
|
| 53651 |
+
"step": 8834
|
| 53652 |
+
},
|
| 53653 |
+
{
|
| 53654 |
+
"epoch": 71.01,
|
| 53655 |
+
"learning_rate": 9.8735056542811e-06,
|
| 53656 |
+
"loss": 1.5553,
|
| 53657 |
+
"step": 8835
|
| 53658 |
+
},
|
| 53659 |
+
{
|
| 53660 |
+
"epoch": 71.02,
|
| 53661 |
+
"learning_rate": 9.873489499192247e-06,
|
| 53662 |
+
"loss": 1.4501,
|
| 53663 |
+
"step": 8836
|
| 53664 |
+
},
|
| 53665 |
+
{
|
| 53666 |
+
"epoch": 71.02,
|
| 53667 |
+
"learning_rate": 9.873473344103393e-06,
|
| 53668 |
+
"loss": 1.349,
|
| 53669 |
+
"step": 8837
|
| 53670 |
+
},
|
| 53671 |
+
{
|
| 53672 |
+
"epoch": 71.03,
|
| 53673 |
+
"learning_rate": 9.87345718901454e-06,
|
| 53674 |
+
"loss": 1.436,
|
| 53675 |
+
"step": 8838
|
| 53676 |
+
},
|
| 53677 |
+
{
|
| 53678 |
+
"epoch": 71.04,
|
| 53679 |
+
"learning_rate": 9.873441033925687e-06,
|
| 53680 |
+
"loss": 1.3992,
|
| 53681 |
+
"step": 8839
|
| 53682 |
+
},
|
| 53683 |
+
{
|
| 53684 |
+
"epoch": 71.05,
|
| 53685 |
+
"learning_rate": 9.873424878836836e-06,
|
| 53686 |
+
"loss": 1.6298,
|
| 53687 |
+
"step": 8840
|
| 53688 |
+
},
|
| 53689 |
+
{
|
| 53690 |
+
"epoch": 71.06,
|
| 53691 |
+
"learning_rate": 9.87340872374798e-06,
|
| 53692 |
+
"loss": 1.4596,
|
| 53693 |
+
"step": 8841
|
| 53694 |
+
},
|
| 53695 |
+
{
|
| 53696 |
+
"epoch": 71.06,
|
| 53697 |
+
"learning_rate": 9.873392568659128e-06,
|
| 53698 |
+
"loss": 1.2296,
|
| 53699 |
+
"step": 8842
|
| 53700 |
+
},
|
| 53701 |
+
{
|
| 53702 |
+
"epoch": 71.07,
|
| 53703 |
+
"learning_rate": 9.873376413570276e-06,
|
| 53704 |
+
"loss": 1.6262,
|
| 53705 |
+
"step": 8843
|
| 53706 |
+
},
|
| 53707 |
+
{
|
| 53708 |
+
"epoch": 71.08,
|
| 53709 |
+
"learning_rate": 9.873360258481423e-06,
|
| 53710 |
+
"loss": 1.5551,
|
| 53711 |
+
"step": 8844
|
| 53712 |
+
},
|
| 53713 |
+
{
|
| 53714 |
+
"epoch": 71.09,
|
| 53715 |
+
"learning_rate": 9.87334410339257e-06,
|
| 53716 |
+
"loss": 1.3272,
|
| 53717 |
+
"step": 8845
|
| 53718 |
+
},
|
| 53719 |
+
{
|
| 53720 |
+
"epoch": 71.1,
|
| 53721 |
+
"learning_rate": 9.873327948303717e-06,
|
| 53722 |
+
"loss": 1.3513,
|
| 53723 |
+
"step": 8846
|
| 53724 |
+
},
|
| 53725 |
+
{
|
| 53726 |
+
"epoch": 71.1,
|
| 53727 |
+
"learning_rate": 9.873311793214863e-06,
|
| 53728 |
+
"loss": 1.6325,
|
| 53729 |
+
"step": 8847
|
| 53730 |
+
},
|
| 53731 |
+
{
|
| 53732 |
+
"epoch": 71.11,
|
| 53733 |
+
"learning_rate": 9.87329563812601e-06,
|
| 53734 |
+
"loss": 1.406,
|
| 53735 |
+
"step": 8848
|
| 53736 |
+
},
|
| 53737 |
+
{
|
| 53738 |
+
"epoch": 71.12,
|
| 53739 |
+
"learning_rate": 9.873279483037157e-06,
|
| 53740 |
+
"loss": 2.1379,
|
| 53741 |
+
"step": 8849
|
| 53742 |
+
},
|
| 53743 |
+
{
|
| 53744 |
+
"epoch": 71.13,
|
| 53745 |
+
"learning_rate": 9.873263327948306e-06,
|
| 53746 |
+
"loss": 1.3738,
|
| 53747 |
+
"step": 8850
|
| 53748 |
+
},
|
| 53749 |
+
{
|
| 53750 |
+
"epoch": 71.14,
|
| 53751 |
+
"learning_rate": 9.87324717285945e-06,
|
| 53752 |
+
"loss": 1.5013,
|
| 53753 |
+
"step": 8851
|
| 53754 |
+
},
|
| 53755 |
+
{
|
| 53756 |
+
"epoch": 71.15,
|
| 53757 |
+
"learning_rate": 9.873231017770597e-06,
|
| 53758 |
+
"loss": 1.7028,
|
| 53759 |
+
"step": 8852
|
| 53760 |
+
},
|
| 53761 |
+
{
|
| 53762 |
+
"epoch": 71.15,
|
| 53763 |
+
"learning_rate": 9.873214862681746e-06,
|
| 53764 |
+
"loss": 1.5341,
|
| 53765 |
+
"step": 8853
|
| 53766 |
+
},
|
| 53767 |
+
{
|
| 53768 |
+
"epoch": 71.16,
|
| 53769 |
+
"learning_rate": 9.873198707592893e-06,
|
| 53770 |
+
"loss": 1.6287,
|
| 53771 |
+
"step": 8854
|
| 53772 |
+
},
|
| 53773 |
+
{
|
| 53774 |
+
"epoch": 71.17,
|
| 53775 |
+
"learning_rate": 9.87318255250404e-06,
|
| 53776 |
+
"loss": 1.9904,
|
| 53777 |
+
"step": 8855
|
| 53778 |
+
},
|
| 53779 |
+
{
|
| 53780 |
+
"epoch": 71.18,
|
| 53781 |
+
"learning_rate": 9.873166397415186e-06,
|
| 53782 |
+
"loss": 2.2276,
|
| 53783 |
+
"step": 8856
|
| 53784 |
+
},
|
| 53785 |
+
{
|
| 53786 |
+
"epoch": 71.19,
|
| 53787 |
+
"learning_rate": 9.873150242326333e-06,
|
| 53788 |
+
"loss": 2.1806,
|
| 53789 |
+
"step": 8857
|
| 53790 |
+
},
|
| 53791 |
+
{
|
| 53792 |
+
"epoch": 71.19,
|
| 53793 |
+
"learning_rate": 9.87313408723748e-06,
|
| 53794 |
+
"loss": 2.4198,
|
| 53795 |
+
"step": 8858
|
| 53796 |
+
},
|
| 53797 |
+
{
|
| 53798 |
+
"epoch": 71.2,
|
| 53799 |
+
"learning_rate": 9.873117932148629e-06,
|
| 53800 |
+
"loss": 3.2983,
|
| 53801 |
+
"step": 8859
|
| 53802 |
+
},
|
| 53803 |
+
{
|
| 53804 |
+
"epoch": 71.21,
|
| 53805 |
+
"learning_rate": 9.873101777059775e-06,
|
| 53806 |
+
"loss": 1.4535,
|
| 53807 |
+
"step": 8860
|
| 53808 |
+
},
|
| 53809 |
+
{
|
| 53810 |
+
"epoch": 71.22,
|
| 53811 |
+
"learning_rate": 9.87308562197092e-06,
|
| 53812 |
+
"loss": 1.6163,
|
| 53813 |
+
"step": 8861
|
| 53814 |
+
},
|
| 53815 |
+
{
|
| 53816 |
+
"epoch": 71.23,
|
| 53817 |
+
"learning_rate": 9.873069466882067e-06,
|
| 53818 |
+
"loss": 1.4652,
|
| 53819 |
+
"step": 8862
|
| 53820 |
+
},
|
| 53821 |
+
{
|
| 53822 |
+
"epoch": 71.23,
|
| 53823 |
+
"learning_rate": 9.873053311793216e-06,
|
| 53824 |
+
"loss": 1.4324,
|
| 53825 |
+
"step": 8863
|
| 53826 |
+
},
|
| 53827 |
+
{
|
| 53828 |
+
"epoch": 71.24,
|
| 53829 |
+
"learning_rate": 9.873037156704363e-06,
|
| 53830 |
+
"loss": 1.5285,
|
| 53831 |
+
"step": 8864
|
| 53832 |
+
},
|
| 53833 |
+
{
|
| 53834 |
+
"epoch": 71.25,
|
| 53835 |
+
"learning_rate": 9.87302100161551e-06,
|
| 53836 |
+
"loss": 1.655,
|
| 53837 |
+
"step": 8865
|
| 53838 |
+
},
|
| 53839 |
+
{
|
| 53840 |
+
"epoch": 71.26,
|
| 53841 |
+
"learning_rate": 9.873004846526656e-06,
|
| 53842 |
+
"loss": 1.5131,
|
| 53843 |
+
"step": 8866
|
| 53844 |
+
},
|
| 53845 |
+
{
|
| 53846 |
+
"epoch": 71.27,
|
| 53847 |
+
"learning_rate": 9.872988691437803e-06,
|
| 53848 |
+
"loss": 1.7416,
|
| 53849 |
+
"step": 8867
|
| 53850 |
+
},
|
| 53851 |
+
{
|
| 53852 |
+
"epoch": 71.27,
|
| 53853 |
+
"learning_rate": 9.87297253634895e-06,
|
| 53854 |
+
"loss": 1.2547,
|
| 53855 |
+
"step": 8868
|
| 53856 |
+
},
|
| 53857 |
+
{
|
| 53858 |
+
"epoch": 71.28,
|
| 53859 |
+
"learning_rate": 9.872956381260099e-06,
|
| 53860 |
+
"loss": 1.4199,
|
| 53861 |
+
"step": 8869
|
| 53862 |
+
},
|
| 53863 |
+
{
|
| 53864 |
+
"epoch": 71.29,
|
| 53865 |
+
"learning_rate": 9.872940226171245e-06,
|
| 53866 |
+
"loss": 1.3792,
|
| 53867 |
+
"step": 8870
|
| 53868 |
+
},
|
| 53869 |
+
{
|
| 53870 |
+
"epoch": 71.3,
|
| 53871 |
+
"learning_rate": 9.872924071082392e-06,
|
| 53872 |
+
"loss": 1.3612,
|
| 53873 |
+
"step": 8871
|
| 53874 |
+
},
|
| 53875 |
+
{
|
| 53876 |
+
"epoch": 71.31,
|
| 53877 |
+
"learning_rate": 9.872907915993539e-06,
|
| 53878 |
+
"loss": 1.8577,
|
| 53879 |
+
"step": 8872
|
| 53880 |
+
},
|
| 53881 |
+
{
|
| 53882 |
+
"epoch": 71.31,
|
| 53883 |
+
"learning_rate": 9.872891760904686e-06,
|
| 53884 |
+
"loss": 2.244,
|
| 53885 |
+
"step": 8873
|
| 53886 |
+
},
|
| 53887 |
+
{
|
| 53888 |
+
"epoch": 71.32,
|
| 53889 |
+
"learning_rate": 9.872875605815833e-06,
|
| 53890 |
+
"loss": 1.5285,
|
| 53891 |
+
"step": 8874
|
| 53892 |
+
},
|
| 53893 |
+
{
|
| 53894 |
+
"epoch": 71.33,
|
| 53895 |
+
"learning_rate": 9.87285945072698e-06,
|
| 53896 |
+
"loss": 1.6464,
|
| 53897 |
+
"step": 8875
|
| 53898 |
+
},
|
| 53899 |
+
{
|
| 53900 |
+
"epoch": 71.34,
|
| 53901 |
+
"learning_rate": 9.872843295638126e-06,
|
| 53902 |
+
"loss": 1.7209,
|
| 53903 |
+
"step": 8876
|
| 53904 |
+
},
|
| 53905 |
+
{
|
| 53906 |
+
"epoch": 71.35,
|
| 53907 |
+
"learning_rate": 9.872827140549273e-06,
|
| 53908 |
+
"loss": 1.7093,
|
| 53909 |
+
"step": 8877
|
| 53910 |
+
},
|
| 53911 |
+
{
|
| 53912 |
+
"epoch": 71.35,
|
| 53913 |
+
"learning_rate": 9.87281098546042e-06,
|
| 53914 |
+
"loss": 1.6397,
|
| 53915 |
+
"step": 8878
|
| 53916 |
+
},
|
| 53917 |
+
{
|
| 53918 |
+
"epoch": 71.36,
|
| 53919 |
+
"learning_rate": 9.872794830371569e-06,
|
| 53920 |
+
"loss": 1.6429,
|
| 53921 |
+
"step": 8879
|
| 53922 |
+
},
|
| 53923 |
+
{
|
| 53924 |
+
"epoch": 71.37,
|
| 53925 |
+
"learning_rate": 9.872778675282715e-06,
|
| 53926 |
+
"loss": 1.744,
|
| 53927 |
+
"step": 8880
|
| 53928 |
+
},
|
| 53929 |
+
{
|
| 53930 |
+
"epoch": 71.38,
|
| 53931 |
+
"learning_rate": 9.872762520193862e-06,
|
| 53932 |
+
"loss": 1.9573,
|
| 53933 |
+
"step": 8881
|
| 53934 |
+
},
|
| 53935 |
+
{
|
| 53936 |
+
"epoch": 71.39,
|
| 53937 |
+
"learning_rate": 9.872746365105009e-06,
|
| 53938 |
+
"loss": 2.7426,
|
| 53939 |
+
"step": 8882
|
| 53940 |
+
},
|
| 53941 |
+
{
|
| 53942 |
+
"epoch": 71.4,
|
| 53943 |
+
"learning_rate": 9.872730210016156e-06,
|
| 53944 |
+
"loss": 2.7319,
|
| 53945 |
+
"step": 8883
|
| 53946 |
+
},
|
| 53947 |
+
{
|
| 53948 |
+
"epoch": 71.4,
|
| 53949 |
+
"learning_rate": 9.872714054927303e-06,
|
| 53950 |
+
"loss": 2.8469,
|
| 53951 |
+
"step": 8884
|
| 53952 |
+
},
|
| 53953 |
+
{
|
| 53954 |
+
"epoch": 71.41,
|
| 53955 |
+
"learning_rate": 9.87269789983845e-06,
|
| 53956 |
+
"loss": 1.6622,
|
| 53957 |
+
"step": 8885
|
| 53958 |
+
},
|
| 53959 |
+
{
|
| 53960 |
+
"epoch": 71.42,
|
| 53961 |
+
"learning_rate": 9.872681744749596e-06,
|
| 53962 |
+
"loss": 1.7123,
|
| 53963 |
+
"step": 8886
|
| 53964 |
+
},
|
| 53965 |
+
{
|
| 53966 |
+
"epoch": 71.43,
|
| 53967 |
+
"learning_rate": 9.872665589660743e-06,
|
| 53968 |
+
"loss": 1.2474,
|
| 53969 |
+
"step": 8887
|
| 53970 |
+
},
|
| 53971 |
+
{
|
| 53972 |
+
"epoch": 71.44,
|
| 53973 |
+
"learning_rate": 9.87264943457189e-06,
|
| 53974 |
+
"loss": 1.358,
|
| 53975 |
+
"step": 8888
|
| 53976 |
+
},
|
| 53977 |
+
{
|
| 53978 |
+
"epoch": 71.44,
|
| 53979 |
+
"learning_rate": 9.872633279483039e-06,
|
| 53980 |
+
"loss": 1.3671,
|
| 53981 |
+
"step": 8889
|
| 53982 |
+
},
|
| 53983 |
+
{
|
| 53984 |
+
"epoch": 71.45,
|
| 53985 |
+
"learning_rate": 9.872617124394185e-06,
|
| 53986 |
+
"loss": 1.3765,
|
| 53987 |
+
"step": 8890
|
| 53988 |
+
},
|
| 53989 |
+
{
|
| 53990 |
+
"epoch": 71.46,
|
| 53991 |
+
"learning_rate": 9.872600969305332e-06,
|
| 53992 |
+
"loss": 1.2602,
|
| 53993 |
+
"step": 8891
|
| 53994 |
+
},
|
| 53995 |
+
{
|
| 53996 |
+
"epoch": 71.47,
|
| 53997 |
+
"learning_rate": 9.872584814216479e-06,
|
| 53998 |
+
"loss": 1.3128,
|
| 53999 |
+
"step": 8892
|
| 54000 |
+
},
|
| 54001 |
+
{
|
| 54002 |
+
"epoch": 71.48,
|
| 54003 |
+
"learning_rate": 9.872568659127626e-06,
|
| 54004 |
+
"loss": 1.4784,
|
| 54005 |
+
"step": 8893
|
| 54006 |
+
},
|
| 54007 |
+
{
|
| 54008 |
+
"epoch": 71.48,
|
| 54009 |
+
"learning_rate": 9.872552504038773e-06,
|
| 54010 |
+
"loss": 1.2873,
|
| 54011 |
+
"step": 8894
|
| 54012 |
+
},
|
| 54013 |
+
{
|
| 54014 |
+
"epoch": 71.49,
|
| 54015 |
+
"learning_rate": 9.872536348949921e-06,
|
| 54016 |
+
"loss": 1.9668,
|
| 54017 |
+
"step": 8895
|
| 54018 |
+
},
|
| 54019 |
+
{
|
| 54020 |
+
"epoch": 71.5,
|
| 54021 |
+
"learning_rate": 9.872520193861066e-06,
|
| 54022 |
+
"loss": 1.5462,
|
| 54023 |
+
"step": 8896
|
| 54024 |
+
},
|
| 54025 |
+
{
|
| 54026 |
+
"epoch": 71.51,
|
| 54027 |
+
"learning_rate": 9.872504038772213e-06,
|
| 54028 |
+
"loss": 1.6144,
|
| 54029 |
+
"step": 8897
|
| 54030 |
+
},
|
| 54031 |
+
{
|
| 54032 |
+
"epoch": 71.52,
|
| 54033 |
+
"learning_rate": 9.872487883683362e-06,
|
| 54034 |
+
"loss": 1.3621,
|
| 54035 |
+
"step": 8898
|
| 54036 |
+
},
|
| 54037 |
+
{
|
| 54038 |
+
"epoch": 71.52,
|
| 54039 |
+
"learning_rate": 9.872471728594509e-06,
|
| 54040 |
+
"loss": 1.4176,
|
| 54041 |
+
"step": 8899
|
| 54042 |
+
},
|
| 54043 |
+
{
|
| 54044 |
+
"epoch": 71.53,
|
| 54045 |
+
"learning_rate": 9.872455573505655e-06,
|
| 54046 |
+
"loss": 1.3091,
|
| 54047 |
+
"step": 8900
|
| 54048 |
+
},
|
| 54049 |
+
{
|
| 54050 |
+
"epoch": 71.54,
|
| 54051 |
+
"learning_rate": 9.872439418416802e-06,
|
| 54052 |
+
"loss": 1.456,
|
| 54053 |
+
"step": 8901
|
| 54054 |
+
},
|
| 54055 |
+
{
|
| 54056 |
+
"epoch": 71.55,
|
| 54057 |
+
"learning_rate": 9.872423263327949e-06,
|
| 54058 |
+
"loss": 1.7665,
|
| 54059 |
+
"step": 8902
|
| 54060 |
+
},
|
| 54061 |
+
{
|
| 54062 |
+
"epoch": 71.56,
|
| 54063 |
+
"learning_rate": 9.872407108239096e-06,
|
| 54064 |
+
"loss": 2.1338,
|
| 54065 |
+
"step": 8903
|
| 54066 |
+
},
|
| 54067 |
+
{
|
| 54068 |
+
"epoch": 71.56,
|
| 54069 |
+
"learning_rate": 9.872390953150243e-06,
|
| 54070 |
+
"loss": 1.6296,
|
| 54071 |
+
"step": 8904
|
| 54072 |
+
},
|
| 54073 |
+
{
|
| 54074 |
+
"epoch": 71.57,
|
| 54075 |
+
"learning_rate": 9.872374798061391e-06,
|
| 54076 |
+
"loss": 1.8227,
|
| 54077 |
+
"step": 8905
|
| 54078 |
+
},
|
| 54079 |
+
{
|
| 54080 |
+
"epoch": 71.58,
|
| 54081 |
+
"learning_rate": 9.872358642972536e-06,
|
| 54082 |
+
"loss": 1.856,
|
| 54083 |
+
"step": 8906
|
| 54084 |
+
},
|
| 54085 |
+
{
|
| 54086 |
+
"epoch": 71.59,
|
| 54087 |
+
"learning_rate": 9.872342487883683e-06,
|
| 54088 |
+
"loss": 2.1192,
|
| 54089 |
+
"step": 8907
|
| 54090 |
+
},
|
| 54091 |
+
{
|
| 54092 |
+
"epoch": 71.6,
|
| 54093 |
+
"learning_rate": 9.872326332794832e-06,
|
| 54094 |
+
"loss": 2.9905,
|
| 54095 |
+
"step": 8908
|
| 54096 |
+
},
|
| 54097 |
+
{
|
| 54098 |
+
"epoch": 71.6,
|
| 54099 |
+
"learning_rate": 9.872310177705979e-06,
|
| 54100 |
+
"loss": 3.6145,
|
| 54101 |
+
"step": 8909
|
| 54102 |
+
},
|
| 54103 |
+
{
|
| 54104 |
+
"epoch": 71.61,
|
| 54105 |
+
"learning_rate": 9.872294022617125e-06,
|
| 54106 |
+
"loss": 1.8041,
|
| 54107 |
+
"step": 8910
|
| 54108 |
+
},
|
| 54109 |
+
{
|
| 54110 |
+
"epoch": 71.62,
|
| 54111 |
+
"learning_rate": 9.872277867528272e-06,
|
| 54112 |
+
"loss": 1.5373,
|
| 54113 |
+
"step": 8911
|
| 54114 |
+
},
|
| 54115 |
+
{
|
| 54116 |
+
"epoch": 71.63,
|
| 54117 |
+
"learning_rate": 9.872261712439419e-06,
|
| 54118 |
+
"loss": 1.3917,
|
| 54119 |
+
"step": 8912
|
| 54120 |
+
},
|
| 54121 |
+
{
|
| 54122 |
+
"epoch": 71.64,
|
| 54123 |
+
"learning_rate": 9.872245557350566e-06,
|
| 54124 |
+
"loss": 1.4048,
|
| 54125 |
+
"step": 8913
|
| 54126 |
+
},
|
| 54127 |
+
{
|
| 54128 |
+
"epoch": 71.65,
|
| 54129 |
+
"learning_rate": 9.872229402261714e-06,
|
| 54130 |
+
"loss": 1.5666,
|
| 54131 |
+
"step": 8914
|
| 54132 |
+
},
|
| 54133 |
+
{
|
| 54134 |
+
"epoch": 71.65,
|
| 54135 |
+
"learning_rate": 9.872213247172861e-06,
|
| 54136 |
+
"loss": 1.5705,
|
| 54137 |
+
"step": 8915
|
| 54138 |
+
},
|
| 54139 |
+
{
|
| 54140 |
+
"epoch": 71.66,
|
| 54141 |
+
"learning_rate": 9.872197092084006e-06,
|
| 54142 |
+
"loss": 1.6069,
|
| 54143 |
+
"step": 8916
|
| 54144 |
+
},
|
| 54145 |
+
{
|
| 54146 |
+
"epoch": 71.67,
|
| 54147 |
+
"learning_rate": 9.872180936995153e-06,
|
| 54148 |
+
"loss": 1.5663,
|
| 54149 |
+
"step": 8917
|
| 54150 |
+
},
|
| 54151 |
+
{
|
| 54152 |
+
"epoch": 71.68,
|
| 54153 |
+
"learning_rate": 9.872164781906302e-06,
|
| 54154 |
+
"loss": 1.3925,
|
| 54155 |
+
"step": 8918
|
| 54156 |
+
},
|
| 54157 |
+
{
|
| 54158 |
+
"epoch": 71.69,
|
| 54159 |
+
"learning_rate": 9.872148626817449e-06,
|
| 54160 |
+
"loss": 1.2773,
|
| 54161 |
+
"step": 8919
|
| 54162 |
+
},
|
| 54163 |
+
{
|
| 54164 |
+
"epoch": 71.69,
|
| 54165 |
+
"learning_rate": 9.872132471728595e-06,
|
| 54166 |
+
"loss": 1.5319,
|
| 54167 |
+
"step": 8920
|
| 54168 |
+
},
|
| 54169 |
+
{
|
| 54170 |
+
"epoch": 71.7,
|
| 54171 |
+
"learning_rate": 9.872116316639742e-06,
|
| 54172 |
+
"loss": 1.4748,
|
| 54173 |
+
"step": 8921
|
| 54174 |
+
},
|
| 54175 |
+
{
|
| 54176 |
+
"epoch": 71.71,
|
| 54177 |
+
"learning_rate": 9.872100161550889e-06,
|
| 54178 |
+
"loss": 1.4809,
|
| 54179 |
+
"step": 8922
|
| 54180 |
+
},
|
| 54181 |
+
{
|
| 54182 |
+
"epoch": 71.72,
|
| 54183 |
+
"learning_rate": 9.872084006462036e-06,
|
| 54184 |
+
"loss": 1.285,
|
| 54185 |
+
"step": 8923
|
| 54186 |
+
},
|
| 54187 |
+
{
|
| 54188 |
+
"epoch": 71.73,
|
| 54189 |
+
"learning_rate": 9.872067851373184e-06,
|
| 54190 |
+
"loss": 1.3257,
|
| 54191 |
+
"step": 8924
|
| 54192 |
+
},
|
| 54193 |
+
{
|
| 54194 |
+
"epoch": 71.73,
|
| 54195 |
+
"learning_rate": 9.872051696284331e-06,
|
| 54196 |
+
"loss": 1.6072,
|
| 54197 |
+
"step": 8925
|
| 54198 |
+
},
|
| 54199 |
+
{
|
| 54200 |
+
"epoch": 71.74,
|
| 54201 |
+
"learning_rate": 9.872035541195476e-06,
|
| 54202 |
+
"loss": 1.3755,
|
| 54203 |
+
"step": 8926
|
| 54204 |
+
},
|
| 54205 |
+
{
|
| 54206 |
+
"epoch": 71.75,
|
| 54207 |
+
"learning_rate": 9.872019386106625e-06,
|
| 54208 |
+
"loss": 1.5515,
|
| 54209 |
+
"step": 8927
|
| 54210 |
+
},
|
| 54211 |
+
{
|
| 54212 |
+
"epoch": 71.76,
|
| 54213 |
+
"learning_rate": 9.872003231017772e-06,
|
| 54214 |
+
"loss": 1.8547,
|
| 54215 |
+
"step": 8928
|
| 54216 |
+
},
|
| 54217 |
+
{
|
| 54218 |
+
"epoch": 71.77,
|
| 54219 |
+
"learning_rate": 9.871987075928918e-06,
|
| 54220 |
+
"loss": 1.7236,
|
| 54221 |
+
"step": 8929
|
| 54222 |
+
},
|
| 54223 |
+
{
|
| 54224 |
+
"epoch": 71.77,
|
| 54225 |
+
"learning_rate": 9.871970920840065e-06,
|
| 54226 |
+
"loss": 1.939,
|
| 54227 |
+
"step": 8930
|
| 54228 |
+
},
|
| 54229 |
+
{
|
| 54230 |
+
"epoch": 71.78,
|
| 54231 |
+
"learning_rate": 9.871954765751212e-06,
|
| 54232 |
+
"loss": 1.864,
|
| 54233 |
+
"step": 8931
|
| 54234 |
+
},
|
| 54235 |
+
{
|
| 54236 |
+
"epoch": 71.79,
|
| 54237 |
+
"learning_rate": 9.871938610662359e-06,
|
| 54238 |
+
"loss": 1.8672,
|
| 54239 |
+
"step": 8932
|
| 54240 |
+
},
|
| 54241 |
+
{
|
| 54242 |
+
"epoch": 71.8,
|
| 54243 |
+
"learning_rate": 9.871922455573506e-06,
|
| 54244 |
+
"loss": 2.3148,
|
| 54245 |
+
"step": 8933
|
| 54246 |
+
},
|
| 54247 |
+
{
|
| 54248 |
+
"epoch": 71.81,
|
| 54249 |
+
"learning_rate": 9.871906300484654e-06,
|
| 54250 |
+
"loss": 3.0736,
|
| 54251 |
+
"step": 8934
|
| 54252 |
+
},
|
| 54253 |
+
{
|
| 54254 |
+
"epoch": 71.81,
|
| 54255 |
+
"learning_rate": 9.871890145395801e-06,
|
| 54256 |
+
"loss": 1.783,
|
| 54257 |
+
"step": 8935
|
| 54258 |
+
},
|
| 54259 |
+
{
|
| 54260 |
+
"epoch": 71.82,
|
| 54261 |
+
"learning_rate": 9.871873990306948e-06,
|
| 54262 |
+
"loss": 1.6798,
|
| 54263 |
+
"step": 8936
|
| 54264 |
+
},
|
| 54265 |
+
{
|
| 54266 |
+
"epoch": 71.83,
|
| 54267 |
+
"learning_rate": 9.871857835218095e-06,
|
| 54268 |
+
"loss": 1.6412,
|
| 54269 |
+
"step": 8937
|
| 54270 |
+
},
|
| 54271 |
+
{
|
| 54272 |
+
"epoch": 71.84,
|
| 54273 |
+
"learning_rate": 9.871841680129242e-06,
|
| 54274 |
+
"loss": 1.474,
|
| 54275 |
+
"step": 8938
|
| 54276 |
+
},
|
| 54277 |
+
{
|
| 54278 |
+
"epoch": 71.85,
|
| 54279 |
+
"learning_rate": 9.871825525040388e-06,
|
| 54280 |
+
"loss": 1.45,
|
| 54281 |
+
"step": 8939
|
| 54282 |
+
},
|
| 54283 |
+
{
|
| 54284 |
+
"epoch": 71.85,
|
| 54285 |
+
"learning_rate": 9.871809369951535e-06,
|
| 54286 |
+
"loss": 1.3193,
|
| 54287 |
+
"step": 8940
|
| 54288 |
+
},
|
| 54289 |
+
{
|
| 54290 |
+
"epoch": 71.86,
|
| 54291 |
+
"learning_rate": 9.871793214862682e-06,
|
| 54292 |
+
"loss": 1.5135,
|
| 54293 |
+
"step": 8941
|
| 54294 |
+
},
|
| 54295 |
+
{
|
| 54296 |
+
"epoch": 71.87,
|
| 54297 |
+
"learning_rate": 9.871777059773829e-06,
|
| 54298 |
+
"loss": 1.2765,
|
| 54299 |
+
"step": 8942
|
| 54300 |
+
},
|
| 54301 |
+
{
|
| 54302 |
+
"epoch": 71.88,
|
| 54303 |
+
"learning_rate": 9.871760904684976e-06,
|
| 54304 |
+
"loss": 1.4524,
|
| 54305 |
+
"step": 8943
|
| 54306 |
+
},
|
| 54307 |
+
{
|
| 54308 |
+
"epoch": 71.89,
|
| 54309 |
+
"learning_rate": 9.871744749596124e-06,
|
| 54310 |
+
"loss": 2.1673,
|
| 54311 |
+
"step": 8944
|
| 54312 |
+
},
|
| 54313 |
+
{
|
| 54314 |
+
"epoch": 71.9,
|
| 54315 |
+
"learning_rate": 9.871728594507271e-06,
|
| 54316 |
+
"loss": 1.2339,
|
| 54317 |
+
"step": 8945
|
| 54318 |
+
},
|
| 54319 |
+
{
|
| 54320 |
+
"epoch": 71.9,
|
| 54321 |
+
"learning_rate": 9.871712439418418e-06,
|
| 54322 |
+
"loss": 1.7509,
|
| 54323 |
+
"step": 8946
|
| 54324 |
+
},
|
| 54325 |
+
{
|
| 54326 |
+
"epoch": 71.91,
|
| 54327 |
+
"learning_rate": 9.871696284329565e-06,
|
| 54328 |
+
"loss": 1.7138,
|
| 54329 |
+
"step": 8947
|
| 54330 |
+
},
|
| 54331 |
+
{
|
| 54332 |
+
"epoch": 71.92,
|
| 54333 |
+
"learning_rate": 9.871680129240712e-06,
|
| 54334 |
+
"loss": 1.4647,
|
| 54335 |
+
"step": 8948
|
| 54336 |
+
},
|
| 54337 |
+
{
|
| 54338 |
+
"epoch": 71.93,
|
| 54339 |
+
"learning_rate": 9.871663974151858e-06,
|
| 54340 |
+
"loss": 1.7137,
|
| 54341 |
+
"step": 8949
|
| 54342 |
+
},
|
| 54343 |
+
{
|
| 54344 |
+
"epoch": 71.94,
|
| 54345 |
+
"learning_rate": 9.871647819063005e-06,
|
| 54346 |
+
"loss": 1.337,
|
| 54347 |
+
"step": 8950
|
| 54348 |
+
},
|
| 54349 |
+
{
|
| 54350 |
+
"epoch": 71.94,
|
| 54351 |
+
"learning_rate": 9.871631663974152e-06,
|
| 54352 |
+
"loss": 1.5527,
|
| 54353 |
+
"step": 8951
|
| 54354 |
+
},
|
| 54355 |
+
{
|
| 54356 |
+
"epoch": 71.95,
|
| 54357 |
+
"learning_rate": 9.871615508885299e-06,
|
| 54358 |
+
"loss": 1.8695,
|
| 54359 |
+
"step": 8952
|
| 54360 |
+
},
|
| 54361 |
+
{
|
| 54362 |
+
"epoch": 71.96,
|
| 54363 |
+
"learning_rate": 9.871599353796447e-06,
|
| 54364 |
+
"loss": 1.7975,
|
| 54365 |
+
"step": 8953
|
| 54366 |
+
},
|
| 54367 |
+
{
|
| 54368 |
+
"epoch": 71.97,
|
| 54369 |
+
"learning_rate": 9.871583198707594e-06,
|
| 54370 |
+
"loss": 1.6534,
|
| 54371 |
+
"step": 8954
|
| 54372 |
+
},
|
| 54373 |
+
{
|
| 54374 |
+
"epoch": 71.98,
|
| 54375 |
+
"learning_rate": 9.871567043618741e-06,
|
| 54376 |
+
"loss": 1.8484,
|
| 54377 |
+
"step": 8955
|
| 54378 |
+
},
|
| 54379 |
+
{
|
| 54380 |
+
"epoch": 71.98,
|
| 54381 |
+
"learning_rate": 9.871550888529888e-06,
|
| 54382 |
+
"loss": 2.1141,
|
| 54383 |
+
"step": 8956
|
| 54384 |
+
},
|
| 54385 |
+
{
|
| 54386 |
+
"epoch": 71.99,
|
| 54387 |
+
"learning_rate": 9.871534733441035e-06,
|
| 54388 |
+
"loss": 2.2151,
|
| 54389 |
+
"step": 8957
|
| 54390 |
+
},
|
| 54391 |
+
{
|
| 54392 |
+
"epoch": 72.0,
|
| 54393 |
+
"learning_rate": 9.871518578352182e-06,
|
| 54394 |
+
"loss": 3.2212,
|
| 54395 |
+
"step": 8958
|
| 54396 |
+
},
|
| 54397 |
+
{
|
| 54398 |
+
"epoch": 72.0,
|
| 54399 |
+
"eval_loss": 1.2623388767242432,
|
| 54400 |
+
"eval_runtime": 44.0571,
|
| 54401 |
+
"eval_samples_per_second": 18.953,
|
| 54402 |
+
"eval_steps_per_second": 0.613,
|
| 54403 |
+
"eval_wer": 0.6824567855829349,
|
| 54404 |
+
"step": 8958
|
| 54405 |
}
|
| 54406 |
],
|
| 54407 |
"max_steps": 620000,
|
| 54408 |
"num_train_epochs": 5000,
|
| 54409 |
+
"total_flos": 2.51807555223276e+19,
|
| 54410 |
"trial_name": null,
|
| 54411 |
"trial_params": null
|
| 54412 |
}
|
model-bin/finetune/base/{checkpoint-8212 β checkpoint-8958}/training_args.bin
RENAMED
|
File without changes
|
model-bin/finetune/base/{checkpoint-8336/scaler.pt β log/1629481571.7415848/events.out.tfevents.1629481571.2977154bd390.32087.9}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67f17e14d8cf993f84d88b28619579c5abb3e0c20187e367c7928549f62e28a9
|
| 3 |
+
size 4194
|
model-bin/finetune/base/{checkpoint-8336/rng_state.pth β log/events.out.tfevents.1629481571.2977154bd390.32087.8}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ce1f4c04e01e6683904fb61a0dba575d640fb8538ce171444466011b00fa888
|
| 3 |
+
size 24078
|