Check commited on
Commit
7c91348
Β·
1 Parent(s): b19ee7f

auto commit

Browse files
Files changed (28) hide show
  1. main.py +1 -1
  2. model-bin/finetune/base/checkpoint-8212/pytorch_model.bin +0 -3
  3. model-bin/finetune/base/checkpoint-8212/rng_state.pth +0 -3
  4. model-bin/finetune/base/checkpoint-8212/scaler.pt +0 -3
  5. model-bin/finetune/base/checkpoint-8212/trainer_state.json +0 -0
  6. model-bin/finetune/base/checkpoint-8336/config.json +0 -78
  7. model-bin/finetune/base/checkpoint-8336/optimizer.pt +0 -3
  8. model-bin/finetune/base/checkpoint-8336/preprocessor_config.json +0 -9
  9. model-bin/finetune/base/checkpoint-8336/scheduler.pt +0 -3
  10. model-bin/finetune/base/checkpoint-8336/trainer_state.json +0 -0
  11. model-bin/finetune/base/checkpoint-8336/training_args.bin +0 -3
  12. model-bin/finetune/base/checkpoint-8709/config.json +0 -78
  13. model-bin/finetune/base/checkpoint-8709/optimizer.pt +0 -3
  14. model-bin/finetune/base/checkpoint-8709/preprocessor_config.json +0 -9
  15. model-bin/finetune/base/checkpoint-8709/pytorch_model.bin +0 -3
  16. model-bin/finetune/base/checkpoint-8709/scheduler.pt +0 -3
  17. model-bin/finetune/base/checkpoint-8709/training_args.bin +0 -3
  18. model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/config.json +0 -0
  19. model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/optimizer.pt +1 -1
  20. model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/preprocessor_config.json +0 -0
  21. model-bin/finetune/base/{checkpoint-8336 β†’ checkpoint-8958}/pytorch_model.bin +1 -1
  22. model-bin/finetune/base/{checkpoint-8709 β†’ checkpoint-8958}/rng_state.pth +2 -2
  23. model-bin/finetune/base/{checkpoint-8709 β†’ checkpoint-8958}/scaler.pt +1 -1
  24. model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/scheduler.pt +1 -1
  25. model-bin/finetune/base/{checkpoint-8709 β†’ checkpoint-8958}/trainer_state.json +1517 -5
  26. model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/training_args.bin +0 -0
  27. model-bin/finetune/base/{checkpoint-8336/scaler.pt β†’ log/1629481571.7415848/events.out.tfevents.1629481571.2977154bd390.32087.9} +2 -2
  28. model-bin/finetune/base/{checkpoint-8336/rng_state.pth β†’ log/events.out.tfevents.1629481571.2977154bd390.32087.8} +2 -2
main.py CHANGED
@@ -89,7 +89,7 @@ def load_prepared_dataset(path, processor, cache_file_filter_name, cache_file_ma
89
  def commit_checkpoint():
90
  submit_commands = [
91
  'git add model-bin/finetune/base/*',
92
- 'git commit -m "auto commit"',
93
  'git push origin main'
94
  ]
95
  for command in submit_commands:
 
89
  def commit_checkpoint():
90
  submit_commands = [
91
  'git add model-bin/finetune/base/*',
92
+ 'git commit -m "auto-commit"',
93
  'git push origin main'
94
  ]
95
  for command in submit_commands:
model-bin/finetune/base/checkpoint-8212/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9ceae04a6996246ad6724a00462738dd9f08e397a51090cccaabc78fd64342b
3
- size 377909911
 
 
 
 
model-bin/finetune/base/checkpoint-8212/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:369a5523390db45490fad840928af02d3bc252ff7791aae167c93deca4d91b7f
3
- size 14503
 
 
 
 
model-bin/finetune/base/checkpoint-8212/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce3caf9fa11cbc306f2acff0c04546dc0430f5ec115580be46b6feffffb1db78
3
- size 559
 
 
 
 
model-bin/finetune/base/checkpoint-8212/trainer_state.json DELETED
The diff for this file is too large to render. See raw diff
 
model-bin/finetune/base/checkpoint-8336/config.json DELETED
@@ -1,78 +0,0 @@
1
- {
2
- "_name_or_path": "./model-bin/pretrained/base",
3
- "activation_dropout": 0.1,
4
- "apply_spec_augment": true,
5
- "architectures": [
6
- "Wav2Vec2ForCTC"
7
- ],
8
- "attention_dropout": 0.1,
9
- "bos_token_id": 1,
10
- "codevector_dim": 256,
11
- "contrastive_logits_temperature": 0.1,
12
- "conv_bias": false,
13
- "conv_dim": [
14
- 512,
15
- 512,
16
- 512,
17
- 512,
18
- 512,
19
- 512,
20
- 512
21
- ],
22
- "conv_kernel": [
23
- 10,
24
- 3,
25
- 3,
26
- 3,
27
- 3,
28
- 2,
29
- 2
30
- ],
31
- "conv_stride": [
32
- 5,
33
- 2,
34
- 2,
35
- 2,
36
- 2,
37
- 2,
38
- 2
39
- ],
40
- "ctc_loss_reduction": "mean",
41
- "ctc_zero_infinity": false,
42
- "diversity_loss_weight": 0.1,
43
- "do_stable_layer_norm": false,
44
- "eos_token_id": 2,
45
- "feat_extract_activation": "gelu",
46
- "feat_extract_dropout": 0.0,
47
- "feat_extract_norm": "group",
48
- "feat_proj_dropout": 0.1,
49
- "feat_quantizer_dropout": 0.0,
50
- "final_dropout": 0.1,
51
- "gradient_checkpointing": true,
52
- "hidden_act": "gelu",
53
- "hidden_dropout": 0.1,
54
- "hidden_dropout_prob": 0.1,
55
- "hidden_size": 768,
56
- "initializer_range": 0.02,
57
- "intermediate_size": 3072,
58
- "layer_norm_eps": 1e-05,
59
- "layerdrop": 0.1,
60
- "mask_feature_length": 10,
61
- "mask_feature_prob": 0.0,
62
- "mask_time_length": 10,
63
- "mask_time_prob": 0.05,
64
- "model_type": "wav2vec2",
65
- "num_attention_heads": 12,
66
- "num_codevector_groups": 2,
67
- "num_codevectors_per_group": 320,
68
- "num_conv_pos_embedding_groups": 16,
69
- "num_conv_pos_embeddings": 128,
70
- "num_feat_extract_layers": 7,
71
- "num_hidden_layers": 12,
72
- "num_negatives": 100,
73
- "pad_token_id": 109,
74
- "proj_codevector_dim": 256,
75
- "torch_dtype": "float32",
76
- "transformers_version": "4.9.2",
77
- "vocab_size": 110
78
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model-bin/finetune/base/checkpoint-8336/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d052e5cd2897e521431ffac494367dd2fc0682d7655dcef9e944a91f3f9462
3
- size 722165009
 
 
 
 
model-bin/finetune/base/checkpoint-8336/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": false,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
model-bin/finetune/base/checkpoint-8336/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:52cdb53167833ea5ec421b4bc4792a7a88ca2114418372fb1cd508a5ebcca6b1
3
- size 623
 
 
 
 
model-bin/finetune/base/checkpoint-8336/trainer_state.json DELETED
The diff for this file is too large to render. See raw diff
 
model-bin/finetune/base/checkpoint-8336/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53d5f6b64d43d7a5c22d92f1aa4114a0ed5474a4ee16170f4a83fcd9522f9c6e
3
- size 2671
 
 
 
 
model-bin/finetune/base/checkpoint-8709/config.json DELETED
@@ -1,78 +0,0 @@
1
- {
2
- "_name_or_path": "./model-bin/pretrained/base",
3
- "activation_dropout": 0.1,
4
- "apply_spec_augment": true,
5
- "architectures": [
6
- "Wav2Vec2ForCTC"
7
- ],
8
- "attention_dropout": 0.1,
9
- "bos_token_id": 1,
10
- "codevector_dim": 256,
11
- "contrastive_logits_temperature": 0.1,
12
- "conv_bias": false,
13
- "conv_dim": [
14
- 512,
15
- 512,
16
- 512,
17
- 512,
18
- 512,
19
- 512,
20
- 512
21
- ],
22
- "conv_kernel": [
23
- 10,
24
- 3,
25
- 3,
26
- 3,
27
- 3,
28
- 2,
29
- 2
30
- ],
31
- "conv_stride": [
32
- 5,
33
- 2,
34
- 2,
35
- 2,
36
- 2,
37
- 2,
38
- 2
39
- ],
40
- "ctc_loss_reduction": "mean",
41
- "ctc_zero_infinity": false,
42
- "diversity_loss_weight": 0.1,
43
- "do_stable_layer_norm": false,
44
- "eos_token_id": 2,
45
- "feat_extract_activation": "gelu",
46
- "feat_extract_dropout": 0.0,
47
- "feat_extract_norm": "group",
48
- "feat_proj_dropout": 0.1,
49
- "feat_quantizer_dropout": 0.0,
50
- "final_dropout": 0.1,
51
- "gradient_checkpointing": true,
52
- "hidden_act": "gelu",
53
- "hidden_dropout": 0.1,
54
- "hidden_dropout_prob": 0.1,
55
- "hidden_size": 768,
56
- "initializer_range": 0.02,
57
- "intermediate_size": 3072,
58
- "layer_norm_eps": 1e-05,
59
- "layerdrop": 0.1,
60
- "mask_feature_length": 10,
61
- "mask_feature_prob": 0.0,
62
- "mask_time_length": 10,
63
- "mask_time_prob": 0.05,
64
- "model_type": "wav2vec2",
65
- "num_attention_heads": 12,
66
- "num_codevector_groups": 2,
67
- "num_codevectors_per_group": 320,
68
- "num_conv_pos_embedding_groups": 16,
69
- "num_conv_pos_embeddings": 128,
70
- "num_feat_extract_layers": 7,
71
- "num_hidden_layers": 12,
72
- "num_negatives": 100,
73
- "pad_token_id": 109,
74
- "proj_codevector_dim": 256,
75
- "torch_dtype": "float32",
76
- "transformers_version": "4.9.2",
77
- "vocab_size": 110
78
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model-bin/finetune/base/checkpoint-8709/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:415b0246c463cd5ceb4041b8f28843eda1a8031b909e1738d3c2a92a64e7c7d6
3
- size 722165009
 
 
 
 
model-bin/finetune/base/checkpoint-8709/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": false,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
model-bin/finetune/base/checkpoint-8709/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c6dc6a2981c4ed21a7a862e57603e9e9f7c71d41e65b7c711ef7d511ef61fc8
3
- size 377909911
 
 
 
 
model-bin/finetune/base/checkpoint-8709/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:36c2d8351b8dc01af8da53704884d2363d46bcdd9e86d18e980d29bde05879ec
3
- size 623
 
 
 
 
model-bin/finetune/base/checkpoint-8709/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53d5f6b64d43d7a5c22d92f1aa4114a0ed5474a4ee16170f4a83fcd9522f9c6e
3
- size 2671
 
 
 
 
model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f60be19033d40758a8fa0ddb172463d630c79e031795a839b90adbc6b4d9c2df
3
  size 722165009
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d66880966a57a8c87bdda1b410b4f3e68c34d3b8a424197028887abc7e150c55
3
  size 722165009
model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/preprocessor_config.json RENAMED
File without changes
model-bin/finetune/base/{checkpoint-8336 β†’ checkpoint-8958}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93ccdc196aff3c82e59b51b65baf9e79353a7c8c64d1acdd81f5ae176c169c93
3
  size 377909911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c254f8ad282308ea33bb83c758ef8293899f150adc41b570833d7b458c78bfc
3
  size 377909911
model-bin/finetune/base/{checkpoint-8709 β†’ checkpoint-8958}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a4880d340c77f40515f04f749e39abccbe6111e450e52ab35bc472d7f0a68c
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a91a6dcfed691871e13767ddc06caddb7fcdfd1c0a09919cf6d6f6999f8cf961
3
+ size 14567
model-bin/finetune/base/{checkpoint-8709 β†’ checkpoint-8958}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8159c1d1c63d3825e8a05cb5ddecb5b6c3e8a74bed03176fab5ab56815b0464
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13952a1cb28382e67abf3075b93cfb164b5d815d475d2f1573c4e3e812c1dba1
3
  size 559
model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed8e00b421f4e21c7d4abfffcb970f5149329fc3025b9a68b3cac08248258d67
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfaf4fa12293f359b8f566ee48e8288f4f3121c30617ecd31186b7e3d795a959
3
  size 623
model-bin/finetune/base/{checkpoint-8709 β†’ checkpoint-8958}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.7469591821505858,
3
- "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-8709",
4
- "epoch": 70.0,
5
- "global_step": 8709,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -52890,11 +52890,1523 @@
52890
  "eval_steps_per_second": 0.631,
52891
  "eval_wer": 0.7469591821505858,
52892
  "step": 8709
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52893
  }
52894
  ],
52895
  "max_steps": 620000,
52896
  "num_train_epochs": 5000,
52897
- "total_flos": 2.448174582026537e+19,
52898
  "trial_name": null,
52899
  "trial_params": null
52900
  }
 
1
  {
2
+ "best_metric": 0.6824567855829349,
3
+ "best_model_checkpoint": "./model-bin/finetune/base/checkpoint-8958",
4
+ "epoch": 72.0,
5
+ "global_step": 8958,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
52890
  "eval_steps_per_second": 0.631,
52891
  "eval_wer": 0.7469591821505858,
52892
  "step": 8709
52893
+ },
52894
+ {
52895
+ "epoch": 69.01,
52896
+ "learning_rate": 9.875525040387724e-06,
52897
+ "loss": 1.6153,
52898
+ "step": 8710
52899
+ },
52900
+ {
52901
+ "epoch": 69.02,
52902
+ "learning_rate": 9.87550888529887e-06,
52903
+ "loss": 1.4633,
52904
+ "step": 8711
52905
+ },
52906
+ {
52907
+ "epoch": 69.02,
52908
+ "learning_rate": 9.875492730210016e-06,
52909
+ "loss": 1.5134,
52910
+ "step": 8712
52911
+ },
52912
+ {
52913
+ "epoch": 69.03,
52914
+ "learning_rate": 9.875476575121163e-06,
52915
+ "loss": 1.5621,
52916
+ "step": 8713
52917
+ },
52918
+ {
52919
+ "epoch": 69.04,
52920
+ "learning_rate": 9.875460420032311e-06,
52921
+ "loss": 1.4003,
52922
+ "step": 8714
52923
+ },
52924
+ {
52925
+ "epoch": 69.05,
52926
+ "learning_rate": 9.875444264943458e-06,
52927
+ "loss": 1.54,
52928
+ "step": 8715
52929
+ },
52930
+ {
52931
+ "epoch": 69.06,
52932
+ "learning_rate": 9.875428109854605e-06,
52933
+ "loss": 1.3723,
52934
+ "step": 8716
52935
+ },
52936
+ {
52937
+ "epoch": 69.06,
52938
+ "learning_rate": 9.875411954765752e-06,
52939
+ "loss": 1.672,
52940
+ "step": 8717
52941
+ },
52942
+ {
52943
+ "epoch": 69.07,
52944
+ "learning_rate": 9.875395799676899e-06,
52945
+ "loss": 1.5243,
52946
+ "step": 8718
52947
+ },
52948
+ {
52949
+ "epoch": 69.08,
52950
+ "learning_rate": 9.875379644588046e-06,
52951
+ "loss": 1.593,
52952
+ "step": 8719
52953
+ },
52954
+ {
52955
+ "epoch": 69.09,
52956
+ "learning_rate": 9.875363489499194e-06,
52957
+ "loss": 1.3595,
52958
+ "step": 8720
52959
+ },
52960
+ {
52961
+ "epoch": 69.1,
52962
+ "learning_rate": 9.87534733441034e-06,
52963
+ "loss": 1.4388,
52964
+ "step": 8721
52965
+ },
52966
+ {
52967
+ "epoch": 69.1,
52968
+ "learning_rate": 9.875331179321486e-06,
52969
+ "loss": 1.3854,
52970
+ "step": 8722
52971
+ },
52972
+ {
52973
+ "epoch": 69.11,
52974
+ "learning_rate": 9.875315024232635e-06,
52975
+ "loss": 1.8071,
52976
+ "step": 8723
52977
+ },
52978
+ {
52979
+ "epoch": 69.12,
52980
+ "learning_rate": 9.875298869143781e-06,
52981
+ "loss": 1.5355,
52982
+ "step": 8724
52983
+ },
52984
+ {
52985
+ "epoch": 69.13,
52986
+ "learning_rate": 9.875282714054928e-06,
52987
+ "loss": 1.4477,
52988
+ "step": 8725
52989
+ },
52990
+ {
52991
+ "epoch": 69.14,
52992
+ "learning_rate": 9.875266558966075e-06,
52993
+ "loss": 1.5663,
52994
+ "step": 8726
52995
+ },
52996
+ {
52997
+ "epoch": 69.14,
52998
+ "learning_rate": 9.875250403877222e-06,
52999
+ "loss": 1.7814,
53000
+ "step": 8727
53001
+ },
53002
+ {
53003
+ "epoch": 69.15,
53004
+ "learning_rate": 9.875234248788369e-06,
53005
+ "loss": 1.4415,
53006
+ "step": 8728
53007
+ },
53008
+ {
53009
+ "epoch": 69.16,
53010
+ "learning_rate": 9.875218093699516e-06,
53011
+ "loss": 1.7214,
53012
+ "step": 8729
53013
+ },
53014
+ {
53015
+ "epoch": 69.17,
53016
+ "learning_rate": 9.875201938610664e-06,
53017
+ "loss": 1.8272,
53018
+ "step": 8730
53019
+ },
53020
+ {
53021
+ "epoch": 69.18,
53022
+ "learning_rate": 9.875185783521811e-06,
53023
+ "loss": 1.8333,
53024
+ "step": 8731
53025
+ },
53026
+ {
53027
+ "epoch": 69.18,
53028
+ "learning_rate": 9.875169628432956e-06,
53029
+ "loss": 2.2468,
53030
+ "step": 8732
53031
+ },
53032
+ {
53033
+ "epoch": 69.19,
53034
+ "learning_rate": 9.875153473344105e-06,
53035
+ "loss": 2.7862,
53036
+ "step": 8733
53037
+ },
53038
+ {
53039
+ "epoch": 69.2,
53040
+ "learning_rate": 9.875137318255251e-06,
53041
+ "loss": 3.3676,
53042
+ "step": 8734
53043
+ },
53044
+ {
53045
+ "epoch": 69.21,
53046
+ "learning_rate": 9.875121163166398e-06,
53047
+ "loss": 1.5527,
53048
+ "step": 8735
53049
+ },
53050
+ {
53051
+ "epoch": 69.22,
53052
+ "learning_rate": 9.875105008077545e-06,
53053
+ "loss": 1.4331,
53054
+ "step": 8736
53055
+ },
53056
+ {
53057
+ "epoch": 69.22,
53058
+ "learning_rate": 9.875088852988692e-06,
53059
+ "loss": 1.3974,
53060
+ "step": 8737
53061
+ },
53062
+ {
53063
+ "epoch": 69.23,
53064
+ "learning_rate": 9.875072697899839e-06,
53065
+ "loss": 1.3391,
53066
+ "step": 8738
53067
+ },
53068
+ {
53069
+ "epoch": 69.24,
53070
+ "learning_rate": 9.875056542810987e-06,
53071
+ "loss": 1.3186,
53072
+ "step": 8739
53073
+ },
53074
+ {
53075
+ "epoch": 69.25,
53076
+ "learning_rate": 9.875040387722134e-06,
53077
+ "loss": 1.5476,
53078
+ "step": 8740
53079
+ },
53080
+ {
53081
+ "epoch": 69.26,
53082
+ "learning_rate": 9.875024232633281e-06,
53083
+ "loss": 1.6061,
53084
+ "step": 8741
53085
+ },
53086
+ {
53087
+ "epoch": 69.26,
53088
+ "learning_rate": 9.875008077544426e-06,
53089
+ "loss": 1.5964,
53090
+ "step": 8742
53091
+ },
53092
+ {
53093
+ "epoch": 69.27,
53094
+ "learning_rate": 9.874991922455575e-06,
53095
+ "loss": 1.4022,
53096
+ "step": 8743
53097
+ },
53098
+ {
53099
+ "epoch": 69.28,
53100
+ "learning_rate": 9.874975767366721e-06,
53101
+ "loss": 1.6853,
53102
+ "step": 8744
53103
+ },
53104
+ {
53105
+ "epoch": 69.29,
53106
+ "learning_rate": 9.874959612277868e-06,
53107
+ "loss": 2.0233,
53108
+ "step": 8745
53109
+ },
53110
+ {
53111
+ "epoch": 69.3,
53112
+ "learning_rate": 9.874943457189015e-06,
53113
+ "loss": 1.4809,
53114
+ "step": 8746
53115
+ },
53116
+ {
53117
+ "epoch": 69.3,
53118
+ "learning_rate": 9.874927302100162e-06,
53119
+ "loss": 1.3434,
53120
+ "step": 8747
53121
+ },
53122
+ {
53123
+ "epoch": 69.31,
53124
+ "learning_rate": 9.874911147011309e-06,
53125
+ "loss": 1.5711,
53126
+ "step": 8748
53127
+ },
53128
+ {
53129
+ "epoch": 69.32,
53130
+ "learning_rate": 9.874894991922457e-06,
53131
+ "loss": 1.5846,
53132
+ "step": 8749
53133
+ },
53134
+ {
53135
+ "epoch": 69.33,
53136
+ "learning_rate": 9.874878836833604e-06,
53137
+ "loss": 1.5662,
53138
+ "step": 8750
53139
+ },
53140
+ {
53141
+ "epoch": 69.34,
53142
+ "learning_rate": 9.874862681744751e-06,
53143
+ "loss": 1.5346,
53144
+ "step": 8751
53145
+ },
53146
+ {
53147
+ "epoch": 69.34,
53148
+ "learning_rate": 9.874846526655898e-06,
53149
+ "loss": 1.5122,
53150
+ "step": 8752
53151
+ },
53152
+ {
53153
+ "epoch": 69.35,
53154
+ "learning_rate": 9.874830371567045e-06,
53155
+ "loss": 1.6172,
53156
+ "step": 8753
53157
+ },
53158
+ {
53159
+ "epoch": 69.36,
53160
+ "learning_rate": 9.874814216478191e-06,
53161
+ "loss": 1.6781,
53162
+ "step": 8754
53163
+ },
53164
+ {
53165
+ "epoch": 69.37,
53166
+ "learning_rate": 9.874798061389338e-06,
53167
+ "loss": 1.8096,
53168
+ "step": 8755
53169
+ },
53170
+ {
53171
+ "epoch": 69.38,
53172
+ "learning_rate": 9.874781906300485e-06,
53173
+ "loss": 2.1775,
53174
+ "step": 8756
53175
+ },
53176
+ {
53177
+ "epoch": 69.38,
53178
+ "learning_rate": 9.874765751211632e-06,
53179
+ "loss": 2.7198,
53180
+ "step": 8757
53181
+ },
53182
+ {
53183
+ "epoch": 69.39,
53184
+ "learning_rate": 9.874749596122779e-06,
53185
+ "loss": 2.4698,
53186
+ "step": 8758
53187
+ },
53188
+ {
53189
+ "epoch": 69.4,
53190
+ "learning_rate": 9.874733441033927e-06,
53191
+ "loss": 3.2068,
53192
+ "step": 8759
53193
+ },
53194
+ {
53195
+ "epoch": 69.41,
53196
+ "learning_rate": 9.874717285945074e-06,
53197
+ "loss": 1.5313,
53198
+ "step": 8760
53199
+ },
53200
+ {
53201
+ "epoch": 69.42,
53202
+ "learning_rate": 9.874701130856221e-06,
53203
+ "loss": 1.5581,
53204
+ "step": 8761
53205
+ },
53206
+ {
53207
+ "epoch": 69.42,
53208
+ "learning_rate": 9.874684975767368e-06,
53209
+ "loss": 1.4189,
53210
+ "step": 8762
53211
+ },
53212
+ {
53213
+ "epoch": 69.43,
53214
+ "learning_rate": 9.874668820678515e-06,
53215
+ "loss": 1.5631,
53216
+ "step": 8763
53217
+ },
53218
+ {
53219
+ "epoch": 69.44,
53220
+ "learning_rate": 9.874652665589661e-06,
53221
+ "loss": 1.3352,
53222
+ "step": 8764
53223
+ },
53224
+ {
53225
+ "epoch": 69.45,
53226
+ "learning_rate": 9.87463651050081e-06,
53227
+ "loss": 1.4554,
53228
+ "step": 8765
53229
+ },
53230
+ {
53231
+ "epoch": 69.46,
53232
+ "learning_rate": 9.874620355411955e-06,
53233
+ "loss": 1.3733,
53234
+ "step": 8766
53235
+ },
53236
+ {
53237
+ "epoch": 69.46,
53238
+ "learning_rate": 9.874604200323102e-06,
53239
+ "loss": 1.7026,
53240
+ "step": 8767
53241
+ },
53242
+ {
53243
+ "epoch": 69.47,
53244
+ "learning_rate": 9.874588045234249e-06,
53245
+ "loss": 1.5812,
53246
+ "step": 8768
53247
+ },
53248
+ {
53249
+ "epoch": 69.48,
53250
+ "learning_rate": 9.874571890145397e-06,
53251
+ "loss": 1.3614,
53252
+ "step": 8769
53253
+ },
53254
+ {
53255
+ "epoch": 69.49,
53256
+ "learning_rate": 9.874555735056544e-06,
53257
+ "loss": 1.3396,
53258
+ "step": 8770
53259
+ },
53260
+ {
53261
+ "epoch": 69.5,
53262
+ "learning_rate": 9.874539579967691e-06,
53263
+ "loss": 1.5401,
53264
+ "step": 8771
53265
+ },
53266
+ {
53267
+ "epoch": 69.5,
53268
+ "learning_rate": 9.874523424878838e-06,
53269
+ "loss": 1.438,
53270
+ "step": 8772
53271
+ },
53272
+ {
53273
+ "epoch": 69.51,
53274
+ "learning_rate": 9.874507269789985e-06,
53275
+ "loss": 1.4894,
53276
+ "step": 8773
53277
+ },
53278
+ {
53279
+ "epoch": 69.52,
53280
+ "learning_rate": 9.874491114701131e-06,
53281
+ "loss": 1.3303,
53282
+ "step": 8774
53283
+ },
53284
+ {
53285
+ "epoch": 69.53,
53286
+ "learning_rate": 9.87447495961228e-06,
53287
+ "loss": 1.3972,
53288
+ "step": 8775
53289
+ },
53290
+ {
53291
+ "epoch": 69.54,
53292
+ "learning_rate": 9.874458804523425e-06,
53293
+ "loss": 1.6936,
53294
+ "step": 8776
53295
+ },
53296
+ {
53297
+ "epoch": 69.54,
53298
+ "learning_rate": 9.874442649434572e-06,
53299
+ "loss": 1.5058,
53300
+ "step": 8777
53301
+ },
53302
+ {
53303
+ "epoch": 69.55,
53304
+ "learning_rate": 9.87442649434572e-06,
53305
+ "loss": 1.5953,
53306
+ "step": 8778
53307
+ },
53308
+ {
53309
+ "epoch": 69.56,
53310
+ "learning_rate": 9.874410339256867e-06,
53311
+ "loss": 1.7185,
53312
+ "step": 8779
53313
+ },
53314
+ {
53315
+ "epoch": 69.57,
53316
+ "learning_rate": 9.874394184168014e-06,
53317
+ "loss": 1.956,
53318
+ "step": 8780
53319
+ },
53320
+ {
53321
+ "epoch": 69.58,
53322
+ "learning_rate": 9.87437802907916e-06,
53323
+ "loss": 1.9566,
53324
+ "step": 8781
53325
+ },
53326
+ {
53327
+ "epoch": 69.58,
53328
+ "learning_rate": 9.874361873990308e-06,
53329
+ "loss": 2.2227,
53330
+ "step": 8782
53331
+ },
53332
+ {
53333
+ "epoch": 69.59,
53334
+ "learning_rate": 9.874345718901454e-06,
53335
+ "loss": 2.3693,
53336
+ "step": 8783
53337
+ },
53338
+ {
53339
+ "epoch": 69.6,
53340
+ "learning_rate": 9.874329563812601e-06,
53341
+ "loss": 2.9933,
53342
+ "step": 8784
53343
+ },
53344
+ {
53345
+ "epoch": 69.61,
53346
+ "learning_rate": 9.87431340872375e-06,
53347
+ "loss": 2.1501,
53348
+ "step": 8785
53349
+ },
53350
+ {
53351
+ "epoch": 69.62,
53352
+ "learning_rate": 9.874297253634895e-06,
53353
+ "loss": 2.1441,
53354
+ "step": 8786
53355
+ },
53356
+ {
53357
+ "epoch": 69.62,
53358
+ "learning_rate": 9.874281098546042e-06,
53359
+ "loss": 1.6236,
53360
+ "step": 8787
53361
+ },
53362
+ {
53363
+ "epoch": 69.63,
53364
+ "learning_rate": 9.87426494345719e-06,
53365
+ "loss": 1.4453,
53366
+ "step": 8788
53367
+ },
53368
+ {
53369
+ "epoch": 69.64,
53370
+ "learning_rate": 9.874248788368337e-06,
53371
+ "loss": 1.4271,
53372
+ "step": 8789
53373
+ },
53374
+ {
53375
+ "epoch": 69.65,
53376
+ "learning_rate": 9.874232633279484e-06,
53377
+ "loss": 2.0905,
53378
+ "step": 8790
53379
+ },
53380
+ {
53381
+ "epoch": 69.66,
53382
+ "learning_rate": 9.87421647819063e-06,
53383
+ "loss": 1.6809,
53384
+ "step": 8791
53385
+ },
53386
+ {
53387
+ "epoch": 69.66,
53388
+ "learning_rate": 9.874200323101778e-06,
53389
+ "loss": 2.1893,
53390
+ "step": 8792
53391
+ },
53392
+ {
53393
+ "epoch": 69.67,
53394
+ "learning_rate": 9.874184168012924e-06,
53395
+ "loss": 1.4405,
53396
+ "step": 8793
53397
+ },
53398
+ {
53399
+ "epoch": 69.68,
53400
+ "learning_rate": 9.874168012924071e-06,
53401
+ "loss": 1.4139,
53402
+ "step": 8794
53403
+ },
53404
+ {
53405
+ "epoch": 69.69,
53406
+ "learning_rate": 9.87415185783522e-06,
53407
+ "loss": 1.5473,
53408
+ "step": 8795
53409
+ },
53410
+ {
53411
+ "epoch": 69.7,
53412
+ "learning_rate": 9.874135702746367e-06,
53413
+ "loss": 1.4509,
53414
+ "step": 8796
53415
+ },
53416
+ {
53417
+ "epoch": 69.7,
53418
+ "learning_rate": 9.874119547657512e-06,
53419
+ "loss": 1.3682,
53420
+ "step": 8797
53421
+ },
53422
+ {
53423
+ "epoch": 69.71,
53424
+ "learning_rate": 9.87410339256866e-06,
53425
+ "loss": 1.3525,
53426
+ "step": 8798
53427
+ },
53428
+ {
53429
+ "epoch": 69.72,
53430
+ "learning_rate": 9.874087237479807e-06,
53431
+ "loss": 1.5427,
53432
+ "step": 8799
53433
+ },
53434
+ {
53435
+ "epoch": 69.73,
53436
+ "learning_rate": 9.874071082390954e-06,
53437
+ "loss": 1.4963,
53438
+ "step": 8800
53439
+ },
53440
+ {
53441
+ "epoch": 69.74,
53442
+ "learning_rate": 9.8740549273021e-06,
53443
+ "loss": 1.3988,
53444
+ "step": 8801
53445
+ },
53446
+ {
53447
+ "epoch": 69.74,
53448
+ "learning_rate": 9.874038772213248e-06,
53449
+ "loss": 1.8708,
53450
+ "step": 8802
53451
+ },
53452
+ {
53453
+ "epoch": 69.75,
53454
+ "learning_rate": 9.874022617124394e-06,
53455
+ "loss": 1.7412,
53456
+ "step": 8803
53457
+ },
53458
+ {
53459
+ "epoch": 69.76,
53460
+ "learning_rate": 9.874006462035543e-06,
53461
+ "loss": 2.0043,
53462
+ "step": 8804
53463
+ },
53464
+ {
53465
+ "epoch": 69.77,
53466
+ "learning_rate": 9.87399030694669e-06,
53467
+ "loss": 2.1015,
53468
+ "step": 8805
53469
+ },
53470
+ {
53471
+ "epoch": 69.78,
53472
+ "learning_rate": 9.873974151857837e-06,
53473
+ "loss": 2.205,
53474
+ "step": 8806
53475
+ },
53476
+ {
53477
+ "epoch": 69.78,
53478
+ "learning_rate": 9.873957996768983e-06,
53479
+ "loss": 2.1445,
53480
+ "step": 8807
53481
+ },
53482
+ {
53483
+ "epoch": 69.79,
53484
+ "learning_rate": 9.87394184168013e-06,
53485
+ "loss": 2.7113,
53486
+ "step": 8808
53487
+ },
53488
+ {
53489
+ "epoch": 69.8,
53490
+ "learning_rate": 9.873925686591277e-06,
53491
+ "loss": 3.3649,
53492
+ "step": 8809
53493
+ },
53494
+ {
53495
+ "epoch": 69.81,
53496
+ "learning_rate": 9.873909531502424e-06,
53497
+ "loss": 1.4795,
53498
+ "step": 8810
53499
+ },
53500
+ {
53501
+ "epoch": 69.82,
53502
+ "learning_rate": 9.87389337641357e-06,
53503
+ "loss": 1.5267,
53504
+ "step": 8811
53505
+ },
53506
+ {
53507
+ "epoch": 69.82,
53508
+ "learning_rate": 9.873877221324718e-06,
53509
+ "loss": 1.5875,
53510
+ "step": 8812
53511
+ },
53512
+ {
53513
+ "epoch": 69.83,
53514
+ "learning_rate": 9.873861066235864e-06,
53515
+ "loss": 1.5205,
53516
+ "step": 8813
53517
+ },
53518
+ {
53519
+ "epoch": 69.84,
53520
+ "learning_rate": 9.873844911147013e-06,
53521
+ "loss": 1.3915,
53522
+ "step": 8814
53523
+ },
53524
+ {
53525
+ "epoch": 69.85,
53526
+ "learning_rate": 9.87382875605816e-06,
53527
+ "loss": 1.5903,
53528
+ "step": 8815
53529
+ },
53530
+ {
53531
+ "epoch": 69.86,
53532
+ "learning_rate": 9.873812600969307e-06,
53533
+ "loss": 1.4766,
53534
+ "step": 8816
53535
+ },
53536
+ {
53537
+ "epoch": 69.86,
53538
+ "learning_rate": 9.873796445880453e-06,
53539
+ "loss": 2.2292,
53540
+ "step": 8817
53541
+ },
53542
+ {
53543
+ "epoch": 69.87,
53544
+ "learning_rate": 9.8737802907916e-06,
53545
+ "loss": 1.6554,
53546
+ "step": 8818
53547
+ },
53548
+ {
53549
+ "epoch": 69.88,
53550
+ "learning_rate": 9.873764135702747e-06,
53551
+ "loss": 1.3882,
53552
+ "step": 8819
53553
+ },
53554
+ {
53555
+ "epoch": 69.89,
53556
+ "learning_rate": 9.873747980613894e-06,
53557
+ "loss": 1.5054,
53558
+ "step": 8820
53559
+ },
53560
+ {
53561
+ "epoch": 69.9,
53562
+ "learning_rate": 9.87373182552504e-06,
53563
+ "loss": 1.6373,
53564
+ "step": 8821
53565
+ },
53566
+ {
53567
+ "epoch": 69.9,
53568
+ "learning_rate": 9.873715670436188e-06,
53569
+ "loss": 1.5243,
53570
+ "step": 8822
53571
+ },
53572
+ {
53573
+ "epoch": 69.91,
53574
+ "learning_rate": 9.873699515347334e-06,
53575
+ "loss": 2.1341,
53576
+ "step": 8823
53577
+ },
53578
+ {
53579
+ "epoch": 69.92,
53580
+ "learning_rate": 9.873683360258483e-06,
53581
+ "loss": 1.317,
53582
+ "step": 8824
53583
+ },
53584
+ {
53585
+ "epoch": 69.93,
53586
+ "learning_rate": 9.87366720516963e-06,
53587
+ "loss": 1.7215,
53588
+ "step": 8825
53589
+ },
53590
+ {
53591
+ "epoch": 69.94,
53592
+ "learning_rate": 9.873651050080777e-06,
53593
+ "loss": 1.3331,
53594
+ "step": 8826
53595
+ },
53596
+ {
53597
+ "epoch": 69.94,
53598
+ "learning_rate": 9.873634894991923e-06,
53599
+ "loss": 1.6016,
53600
+ "step": 8827
53601
+ },
53602
+ {
53603
+ "epoch": 69.95,
53604
+ "learning_rate": 9.87361873990307e-06,
53605
+ "loss": 1.5569,
53606
+ "step": 8828
53607
+ },
53608
+ {
53609
+ "epoch": 69.96,
53610
+ "learning_rate": 9.873602584814217e-06,
53611
+ "loss": 2.2356,
53612
+ "step": 8829
53613
+ },
53614
+ {
53615
+ "epoch": 69.97,
53616
+ "learning_rate": 9.873586429725366e-06,
53617
+ "loss": 2.2153,
53618
+ "step": 8830
53619
+ },
53620
+ {
53621
+ "epoch": 69.98,
53622
+ "learning_rate": 9.87357027463651e-06,
53623
+ "loss": 1.9975,
53624
+ "step": 8831
53625
+ },
53626
+ {
53627
+ "epoch": 69.98,
53628
+ "learning_rate": 9.873554119547658e-06,
53629
+ "loss": 2.2298,
53630
+ "step": 8832
53631
+ },
53632
+ {
53633
+ "epoch": 69.99,
53634
+ "learning_rate": 9.873537964458806e-06,
53635
+ "loss": 2.9414,
53636
+ "step": 8833
53637
+ },
53638
+ {
53639
+ "epoch": 70.0,
53640
+ "learning_rate": 9.873521809369953e-06,
53641
+ "loss": 3.4378,
53642
+ "step": 8834
53643
+ },
53644
+ {
53645
+ "epoch": 70.0,
53646
+ "eval_loss": 1.3710267543792725,
53647
+ "eval_runtime": 43.6624,
53648
+ "eval_samples_per_second": 19.147,
53649
+ "eval_steps_per_second": 0.618,
53650
+ "eval_wer": 0.7103508263264714,
53651
+ "step": 8834
53652
+ },
53653
+ {
53654
+ "epoch": 71.01,
53655
+ "learning_rate": 9.8735056542811e-06,
53656
+ "loss": 1.5553,
53657
+ "step": 8835
53658
+ },
53659
+ {
53660
+ "epoch": 71.02,
53661
+ "learning_rate": 9.873489499192247e-06,
53662
+ "loss": 1.4501,
53663
+ "step": 8836
53664
+ },
53665
+ {
53666
+ "epoch": 71.02,
53667
+ "learning_rate": 9.873473344103393e-06,
53668
+ "loss": 1.349,
53669
+ "step": 8837
53670
+ },
53671
+ {
53672
+ "epoch": 71.03,
53673
+ "learning_rate": 9.87345718901454e-06,
53674
+ "loss": 1.436,
53675
+ "step": 8838
53676
+ },
53677
+ {
53678
+ "epoch": 71.04,
53679
+ "learning_rate": 9.873441033925687e-06,
53680
+ "loss": 1.3992,
53681
+ "step": 8839
53682
+ },
53683
+ {
53684
+ "epoch": 71.05,
53685
+ "learning_rate": 9.873424878836836e-06,
53686
+ "loss": 1.6298,
53687
+ "step": 8840
53688
+ },
53689
+ {
53690
+ "epoch": 71.06,
53691
+ "learning_rate": 9.87340872374798e-06,
53692
+ "loss": 1.4596,
53693
+ "step": 8841
53694
+ },
53695
+ {
53696
+ "epoch": 71.06,
53697
+ "learning_rate": 9.873392568659128e-06,
53698
+ "loss": 1.2296,
53699
+ "step": 8842
53700
+ },
53701
+ {
53702
+ "epoch": 71.07,
53703
+ "learning_rate": 9.873376413570276e-06,
53704
+ "loss": 1.6262,
53705
+ "step": 8843
53706
+ },
53707
+ {
53708
+ "epoch": 71.08,
53709
+ "learning_rate": 9.873360258481423e-06,
53710
+ "loss": 1.5551,
53711
+ "step": 8844
53712
+ },
53713
+ {
53714
+ "epoch": 71.09,
53715
+ "learning_rate": 9.87334410339257e-06,
53716
+ "loss": 1.3272,
53717
+ "step": 8845
53718
+ },
53719
+ {
53720
+ "epoch": 71.1,
53721
+ "learning_rate": 9.873327948303717e-06,
53722
+ "loss": 1.3513,
53723
+ "step": 8846
53724
+ },
53725
+ {
53726
+ "epoch": 71.1,
53727
+ "learning_rate": 9.873311793214863e-06,
53728
+ "loss": 1.6325,
53729
+ "step": 8847
53730
+ },
53731
+ {
53732
+ "epoch": 71.11,
53733
+ "learning_rate": 9.87329563812601e-06,
53734
+ "loss": 1.406,
53735
+ "step": 8848
53736
+ },
53737
+ {
53738
+ "epoch": 71.12,
53739
+ "learning_rate": 9.873279483037157e-06,
53740
+ "loss": 2.1379,
53741
+ "step": 8849
53742
+ },
53743
+ {
53744
+ "epoch": 71.13,
53745
+ "learning_rate": 9.873263327948306e-06,
53746
+ "loss": 1.3738,
53747
+ "step": 8850
53748
+ },
53749
+ {
53750
+ "epoch": 71.14,
53751
+ "learning_rate": 9.87324717285945e-06,
53752
+ "loss": 1.5013,
53753
+ "step": 8851
53754
+ },
53755
+ {
53756
+ "epoch": 71.15,
53757
+ "learning_rate": 9.873231017770597e-06,
53758
+ "loss": 1.7028,
53759
+ "step": 8852
53760
+ },
53761
+ {
53762
+ "epoch": 71.15,
53763
+ "learning_rate": 9.873214862681746e-06,
53764
+ "loss": 1.5341,
53765
+ "step": 8853
53766
+ },
53767
+ {
53768
+ "epoch": 71.16,
53769
+ "learning_rate": 9.873198707592893e-06,
53770
+ "loss": 1.6287,
53771
+ "step": 8854
53772
+ },
53773
+ {
53774
+ "epoch": 71.17,
53775
+ "learning_rate": 9.87318255250404e-06,
53776
+ "loss": 1.9904,
53777
+ "step": 8855
53778
+ },
53779
+ {
53780
+ "epoch": 71.18,
53781
+ "learning_rate": 9.873166397415186e-06,
53782
+ "loss": 2.2276,
53783
+ "step": 8856
53784
+ },
53785
+ {
53786
+ "epoch": 71.19,
53787
+ "learning_rate": 9.873150242326333e-06,
53788
+ "loss": 2.1806,
53789
+ "step": 8857
53790
+ },
53791
+ {
53792
+ "epoch": 71.19,
53793
+ "learning_rate": 9.87313408723748e-06,
53794
+ "loss": 2.4198,
53795
+ "step": 8858
53796
+ },
53797
+ {
53798
+ "epoch": 71.2,
53799
+ "learning_rate": 9.873117932148629e-06,
53800
+ "loss": 3.2983,
53801
+ "step": 8859
53802
+ },
53803
+ {
53804
+ "epoch": 71.21,
53805
+ "learning_rate": 9.873101777059775e-06,
53806
+ "loss": 1.4535,
53807
+ "step": 8860
53808
+ },
53809
+ {
53810
+ "epoch": 71.22,
53811
+ "learning_rate": 9.87308562197092e-06,
53812
+ "loss": 1.6163,
53813
+ "step": 8861
53814
+ },
53815
+ {
53816
+ "epoch": 71.23,
53817
+ "learning_rate": 9.873069466882067e-06,
53818
+ "loss": 1.4652,
53819
+ "step": 8862
53820
+ },
53821
+ {
53822
+ "epoch": 71.23,
53823
+ "learning_rate": 9.873053311793216e-06,
53824
+ "loss": 1.4324,
53825
+ "step": 8863
53826
+ },
53827
+ {
53828
+ "epoch": 71.24,
53829
+ "learning_rate": 9.873037156704363e-06,
53830
+ "loss": 1.5285,
53831
+ "step": 8864
53832
+ },
53833
+ {
53834
+ "epoch": 71.25,
53835
+ "learning_rate": 9.87302100161551e-06,
53836
+ "loss": 1.655,
53837
+ "step": 8865
53838
+ },
53839
+ {
53840
+ "epoch": 71.26,
53841
+ "learning_rate": 9.873004846526656e-06,
53842
+ "loss": 1.5131,
53843
+ "step": 8866
53844
+ },
53845
+ {
53846
+ "epoch": 71.27,
53847
+ "learning_rate": 9.872988691437803e-06,
53848
+ "loss": 1.7416,
53849
+ "step": 8867
53850
+ },
53851
+ {
53852
+ "epoch": 71.27,
53853
+ "learning_rate": 9.87297253634895e-06,
53854
+ "loss": 1.2547,
53855
+ "step": 8868
53856
+ },
53857
+ {
53858
+ "epoch": 71.28,
53859
+ "learning_rate": 9.872956381260099e-06,
53860
+ "loss": 1.4199,
53861
+ "step": 8869
53862
+ },
53863
+ {
53864
+ "epoch": 71.29,
53865
+ "learning_rate": 9.872940226171245e-06,
53866
+ "loss": 1.3792,
53867
+ "step": 8870
53868
+ },
53869
+ {
53870
+ "epoch": 71.3,
53871
+ "learning_rate": 9.872924071082392e-06,
53872
+ "loss": 1.3612,
53873
+ "step": 8871
53874
+ },
53875
+ {
53876
+ "epoch": 71.31,
53877
+ "learning_rate": 9.872907915993539e-06,
53878
+ "loss": 1.8577,
53879
+ "step": 8872
53880
+ },
53881
+ {
53882
+ "epoch": 71.31,
53883
+ "learning_rate": 9.872891760904686e-06,
53884
+ "loss": 2.244,
53885
+ "step": 8873
53886
+ },
53887
+ {
53888
+ "epoch": 71.32,
53889
+ "learning_rate": 9.872875605815833e-06,
53890
+ "loss": 1.5285,
53891
+ "step": 8874
53892
+ },
53893
+ {
53894
+ "epoch": 71.33,
53895
+ "learning_rate": 9.87285945072698e-06,
53896
+ "loss": 1.6464,
53897
+ "step": 8875
53898
+ },
53899
+ {
53900
+ "epoch": 71.34,
53901
+ "learning_rate": 9.872843295638126e-06,
53902
+ "loss": 1.7209,
53903
+ "step": 8876
53904
+ },
53905
+ {
53906
+ "epoch": 71.35,
53907
+ "learning_rate": 9.872827140549273e-06,
53908
+ "loss": 1.7093,
53909
+ "step": 8877
53910
+ },
53911
+ {
53912
+ "epoch": 71.35,
53913
+ "learning_rate": 9.87281098546042e-06,
53914
+ "loss": 1.6397,
53915
+ "step": 8878
53916
+ },
53917
+ {
53918
+ "epoch": 71.36,
53919
+ "learning_rate": 9.872794830371569e-06,
53920
+ "loss": 1.6429,
53921
+ "step": 8879
53922
+ },
53923
+ {
53924
+ "epoch": 71.37,
53925
+ "learning_rate": 9.872778675282715e-06,
53926
+ "loss": 1.744,
53927
+ "step": 8880
53928
+ },
53929
+ {
53930
+ "epoch": 71.38,
53931
+ "learning_rate": 9.872762520193862e-06,
53932
+ "loss": 1.9573,
53933
+ "step": 8881
53934
+ },
53935
+ {
53936
+ "epoch": 71.39,
53937
+ "learning_rate": 9.872746365105009e-06,
53938
+ "loss": 2.7426,
53939
+ "step": 8882
53940
+ },
53941
+ {
53942
+ "epoch": 71.4,
53943
+ "learning_rate": 9.872730210016156e-06,
53944
+ "loss": 2.7319,
53945
+ "step": 8883
53946
+ },
53947
+ {
53948
+ "epoch": 71.4,
53949
+ "learning_rate": 9.872714054927303e-06,
53950
+ "loss": 2.8469,
53951
+ "step": 8884
53952
+ },
53953
+ {
53954
+ "epoch": 71.41,
53955
+ "learning_rate": 9.87269789983845e-06,
53956
+ "loss": 1.6622,
53957
+ "step": 8885
53958
+ },
53959
+ {
53960
+ "epoch": 71.42,
53961
+ "learning_rate": 9.872681744749596e-06,
53962
+ "loss": 1.7123,
53963
+ "step": 8886
53964
+ },
53965
+ {
53966
+ "epoch": 71.43,
53967
+ "learning_rate": 9.872665589660743e-06,
53968
+ "loss": 1.2474,
53969
+ "step": 8887
53970
+ },
53971
+ {
53972
+ "epoch": 71.44,
53973
+ "learning_rate": 9.87264943457189e-06,
53974
+ "loss": 1.358,
53975
+ "step": 8888
53976
+ },
53977
+ {
53978
+ "epoch": 71.44,
53979
+ "learning_rate": 9.872633279483039e-06,
53980
+ "loss": 1.3671,
53981
+ "step": 8889
53982
+ },
53983
+ {
53984
+ "epoch": 71.45,
53985
+ "learning_rate": 9.872617124394185e-06,
53986
+ "loss": 1.3765,
53987
+ "step": 8890
53988
+ },
53989
+ {
53990
+ "epoch": 71.46,
53991
+ "learning_rate": 9.872600969305332e-06,
53992
+ "loss": 1.2602,
53993
+ "step": 8891
53994
+ },
53995
+ {
53996
+ "epoch": 71.47,
53997
+ "learning_rate": 9.872584814216479e-06,
53998
+ "loss": 1.3128,
53999
+ "step": 8892
54000
+ },
54001
+ {
54002
+ "epoch": 71.48,
54003
+ "learning_rate": 9.872568659127626e-06,
54004
+ "loss": 1.4784,
54005
+ "step": 8893
54006
+ },
54007
+ {
54008
+ "epoch": 71.48,
54009
+ "learning_rate": 9.872552504038773e-06,
54010
+ "loss": 1.2873,
54011
+ "step": 8894
54012
+ },
54013
+ {
54014
+ "epoch": 71.49,
54015
+ "learning_rate": 9.872536348949921e-06,
54016
+ "loss": 1.9668,
54017
+ "step": 8895
54018
+ },
54019
+ {
54020
+ "epoch": 71.5,
54021
+ "learning_rate": 9.872520193861066e-06,
54022
+ "loss": 1.5462,
54023
+ "step": 8896
54024
+ },
54025
+ {
54026
+ "epoch": 71.51,
54027
+ "learning_rate": 9.872504038772213e-06,
54028
+ "loss": 1.6144,
54029
+ "step": 8897
54030
+ },
54031
+ {
54032
+ "epoch": 71.52,
54033
+ "learning_rate": 9.872487883683362e-06,
54034
+ "loss": 1.3621,
54035
+ "step": 8898
54036
+ },
54037
+ {
54038
+ "epoch": 71.52,
54039
+ "learning_rate": 9.872471728594509e-06,
54040
+ "loss": 1.4176,
54041
+ "step": 8899
54042
+ },
54043
+ {
54044
+ "epoch": 71.53,
54045
+ "learning_rate": 9.872455573505655e-06,
54046
+ "loss": 1.3091,
54047
+ "step": 8900
54048
+ },
54049
+ {
54050
+ "epoch": 71.54,
54051
+ "learning_rate": 9.872439418416802e-06,
54052
+ "loss": 1.456,
54053
+ "step": 8901
54054
+ },
54055
+ {
54056
+ "epoch": 71.55,
54057
+ "learning_rate": 9.872423263327949e-06,
54058
+ "loss": 1.7665,
54059
+ "step": 8902
54060
+ },
54061
+ {
54062
+ "epoch": 71.56,
54063
+ "learning_rate": 9.872407108239096e-06,
54064
+ "loss": 2.1338,
54065
+ "step": 8903
54066
+ },
54067
+ {
54068
+ "epoch": 71.56,
54069
+ "learning_rate": 9.872390953150243e-06,
54070
+ "loss": 1.6296,
54071
+ "step": 8904
54072
+ },
54073
+ {
54074
+ "epoch": 71.57,
54075
+ "learning_rate": 9.872374798061391e-06,
54076
+ "loss": 1.8227,
54077
+ "step": 8905
54078
+ },
54079
+ {
54080
+ "epoch": 71.58,
54081
+ "learning_rate": 9.872358642972536e-06,
54082
+ "loss": 1.856,
54083
+ "step": 8906
54084
+ },
54085
+ {
54086
+ "epoch": 71.59,
54087
+ "learning_rate": 9.872342487883683e-06,
54088
+ "loss": 2.1192,
54089
+ "step": 8907
54090
+ },
54091
+ {
54092
+ "epoch": 71.6,
54093
+ "learning_rate": 9.872326332794832e-06,
54094
+ "loss": 2.9905,
54095
+ "step": 8908
54096
+ },
54097
+ {
54098
+ "epoch": 71.6,
54099
+ "learning_rate": 9.872310177705979e-06,
54100
+ "loss": 3.6145,
54101
+ "step": 8909
54102
+ },
54103
+ {
54104
+ "epoch": 71.61,
54105
+ "learning_rate": 9.872294022617125e-06,
54106
+ "loss": 1.8041,
54107
+ "step": 8910
54108
+ },
54109
+ {
54110
+ "epoch": 71.62,
54111
+ "learning_rate": 9.872277867528272e-06,
54112
+ "loss": 1.5373,
54113
+ "step": 8911
54114
+ },
54115
+ {
54116
+ "epoch": 71.63,
54117
+ "learning_rate": 9.872261712439419e-06,
54118
+ "loss": 1.3917,
54119
+ "step": 8912
54120
+ },
54121
+ {
54122
+ "epoch": 71.64,
54123
+ "learning_rate": 9.872245557350566e-06,
54124
+ "loss": 1.4048,
54125
+ "step": 8913
54126
+ },
54127
+ {
54128
+ "epoch": 71.65,
54129
+ "learning_rate": 9.872229402261714e-06,
54130
+ "loss": 1.5666,
54131
+ "step": 8914
54132
+ },
54133
+ {
54134
+ "epoch": 71.65,
54135
+ "learning_rate": 9.872213247172861e-06,
54136
+ "loss": 1.5705,
54137
+ "step": 8915
54138
+ },
54139
+ {
54140
+ "epoch": 71.66,
54141
+ "learning_rate": 9.872197092084006e-06,
54142
+ "loss": 1.6069,
54143
+ "step": 8916
54144
+ },
54145
+ {
54146
+ "epoch": 71.67,
54147
+ "learning_rate": 9.872180936995153e-06,
54148
+ "loss": 1.5663,
54149
+ "step": 8917
54150
+ },
54151
+ {
54152
+ "epoch": 71.68,
54153
+ "learning_rate": 9.872164781906302e-06,
54154
+ "loss": 1.3925,
54155
+ "step": 8918
54156
+ },
54157
+ {
54158
+ "epoch": 71.69,
54159
+ "learning_rate": 9.872148626817449e-06,
54160
+ "loss": 1.2773,
54161
+ "step": 8919
54162
+ },
54163
+ {
54164
+ "epoch": 71.69,
54165
+ "learning_rate": 9.872132471728595e-06,
54166
+ "loss": 1.5319,
54167
+ "step": 8920
54168
+ },
54169
+ {
54170
+ "epoch": 71.7,
54171
+ "learning_rate": 9.872116316639742e-06,
54172
+ "loss": 1.4748,
54173
+ "step": 8921
54174
+ },
54175
+ {
54176
+ "epoch": 71.71,
54177
+ "learning_rate": 9.872100161550889e-06,
54178
+ "loss": 1.4809,
54179
+ "step": 8922
54180
+ },
54181
+ {
54182
+ "epoch": 71.72,
54183
+ "learning_rate": 9.872084006462036e-06,
54184
+ "loss": 1.285,
54185
+ "step": 8923
54186
+ },
54187
+ {
54188
+ "epoch": 71.73,
54189
+ "learning_rate": 9.872067851373184e-06,
54190
+ "loss": 1.3257,
54191
+ "step": 8924
54192
+ },
54193
+ {
54194
+ "epoch": 71.73,
54195
+ "learning_rate": 9.872051696284331e-06,
54196
+ "loss": 1.6072,
54197
+ "step": 8925
54198
+ },
54199
+ {
54200
+ "epoch": 71.74,
54201
+ "learning_rate": 9.872035541195476e-06,
54202
+ "loss": 1.3755,
54203
+ "step": 8926
54204
+ },
54205
+ {
54206
+ "epoch": 71.75,
54207
+ "learning_rate": 9.872019386106625e-06,
54208
+ "loss": 1.5515,
54209
+ "step": 8927
54210
+ },
54211
+ {
54212
+ "epoch": 71.76,
54213
+ "learning_rate": 9.872003231017772e-06,
54214
+ "loss": 1.8547,
54215
+ "step": 8928
54216
+ },
54217
+ {
54218
+ "epoch": 71.77,
54219
+ "learning_rate": 9.871987075928918e-06,
54220
+ "loss": 1.7236,
54221
+ "step": 8929
54222
+ },
54223
+ {
54224
+ "epoch": 71.77,
54225
+ "learning_rate": 9.871970920840065e-06,
54226
+ "loss": 1.939,
54227
+ "step": 8930
54228
+ },
54229
+ {
54230
+ "epoch": 71.78,
54231
+ "learning_rate": 9.871954765751212e-06,
54232
+ "loss": 1.864,
54233
+ "step": 8931
54234
+ },
54235
+ {
54236
+ "epoch": 71.79,
54237
+ "learning_rate": 9.871938610662359e-06,
54238
+ "loss": 1.8672,
54239
+ "step": 8932
54240
+ },
54241
+ {
54242
+ "epoch": 71.8,
54243
+ "learning_rate": 9.871922455573506e-06,
54244
+ "loss": 2.3148,
54245
+ "step": 8933
54246
+ },
54247
+ {
54248
+ "epoch": 71.81,
54249
+ "learning_rate": 9.871906300484654e-06,
54250
+ "loss": 3.0736,
54251
+ "step": 8934
54252
+ },
54253
+ {
54254
+ "epoch": 71.81,
54255
+ "learning_rate": 9.871890145395801e-06,
54256
+ "loss": 1.783,
54257
+ "step": 8935
54258
+ },
54259
+ {
54260
+ "epoch": 71.82,
54261
+ "learning_rate": 9.871873990306948e-06,
54262
+ "loss": 1.6798,
54263
+ "step": 8936
54264
+ },
54265
+ {
54266
+ "epoch": 71.83,
54267
+ "learning_rate": 9.871857835218095e-06,
54268
+ "loss": 1.6412,
54269
+ "step": 8937
54270
+ },
54271
+ {
54272
+ "epoch": 71.84,
54273
+ "learning_rate": 9.871841680129242e-06,
54274
+ "loss": 1.474,
54275
+ "step": 8938
54276
+ },
54277
+ {
54278
+ "epoch": 71.85,
54279
+ "learning_rate": 9.871825525040388e-06,
54280
+ "loss": 1.45,
54281
+ "step": 8939
54282
+ },
54283
+ {
54284
+ "epoch": 71.85,
54285
+ "learning_rate": 9.871809369951535e-06,
54286
+ "loss": 1.3193,
54287
+ "step": 8940
54288
+ },
54289
+ {
54290
+ "epoch": 71.86,
54291
+ "learning_rate": 9.871793214862682e-06,
54292
+ "loss": 1.5135,
54293
+ "step": 8941
54294
+ },
54295
+ {
54296
+ "epoch": 71.87,
54297
+ "learning_rate": 9.871777059773829e-06,
54298
+ "loss": 1.2765,
54299
+ "step": 8942
54300
+ },
54301
+ {
54302
+ "epoch": 71.88,
54303
+ "learning_rate": 9.871760904684976e-06,
54304
+ "loss": 1.4524,
54305
+ "step": 8943
54306
+ },
54307
+ {
54308
+ "epoch": 71.89,
54309
+ "learning_rate": 9.871744749596124e-06,
54310
+ "loss": 2.1673,
54311
+ "step": 8944
54312
+ },
54313
+ {
54314
+ "epoch": 71.9,
54315
+ "learning_rate": 9.871728594507271e-06,
54316
+ "loss": 1.2339,
54317
+ "step": 8945
54318
+ },
54319
+ {
54320
+ "epoch": 71.9,
54321
+ "learning_rate": 9.871712439418418e-06,
54322
+ "loss": 1.7509,
54323
+ "step": 8946
54324
+ },
54325
+ {
54326
+ "epoch": 71.91,
54327
+ "learning_rate": 9.871696284329565e-06,
54328
+ "loss": 1.7138,
54329
+ "step": 8947
54330
+ },
54331
+ {
54332
+ "epoch": 71.92,
54333
+ "learning_rate": 9.871680129240712e-06,
54334
+ "loss": 1.4647,
54335
+ "step": 8948
54336
+ },
54337
+ {
54338
+ "epoch": 71.93,
54339
+ "learning_rate": 9.871663974151858e-06,
54340
+ "loss": 1.7137,
54341
+ "step": 8949
54342
+ },
54343
+ {
54344
+ "epoch": 71.94,
54345
+ "learning_rate": 9.871647819063005e-06,
54346
+ "loss": 1.337,
54347
+ "step": 8950
54348
+ },
54349
+ {
54350
+ "epoch": 71.94,
54351
+ "learning_rate": 9.871631663974152e-06,
54352
+ "loss": 1.5527,
54353
+ "step": 8951
54354
+ },
54355
+ {
54356
+ "epoch": 71.95,
54357
+ "learning_rate": 9.871615508885299e-06,
54358
+ "loss": 1.8695,
54359
+ "step": 8952
54360
+ },
54361
+ {
54362
+ "epoch": 71.96,
54363
+ "learning_rate": 9.871599353796447e-06,
54364
+ "loss": 1.7975,
54365
+ "step": 8953
54366
+ },
54367
+ {
54368
+ "epoch": 71.97,
54369
+ "learning_rate": 9.871583198707594e-06,
54370
+ "loss": 1.6534,
54371
+ "step": 8954
54372
+ },
54373
+ {
54374
+ "epoch": 71.98,
54375
+ "learning_rate": 9.871567043618741e-06,
54376
+ "loss": 1.8484,
54377
+ "step": 8955
54378
+ },
54379
+ {
54380
+ "epoch": 71.98,
54381
+ "learning_rate": 9.871550888529888e-06,
54382
+ "loss": 2.1141,
54383
+ "step": 8956
54384
+ },
54385
+ {
54386
+ "epoch": 71.99,
54387
+ "learning_rate": 9.871534733441035e-06,
54388
+ "loss": 2.2151,
54389
+ "step": 8957
54390
+ },
54391
+ {
54392
+ "epoch": 72.0,
54393
+ "learning_rate": 9.871518578352182e-06,
54394
+ "loss": 3.2212,
54395
+ "step": 8958
54396
+ },
54397
+ {
54398
+ "epoch": 72.0,
54399
+ "eval_loss": 1.2623388767242432,
54400
+ "eval_runtime": 44.0571,
54401
+ "eval_samples_per_second": 18.953,
54402
+ "eval_steps_per_second": 0.613,
54403
+ "eval_wer": 0.6824567855829349,
54404
+ "step": 8958
54405
  }
54406
  ],
54407
  "max_steps": 620000,
54408
  "num_train_epochs": 5000,
54409
+ "total_flos": 2.51807555223276e+19,
54410
  "trial_name": null,
54411
  "trial_params": null
54412
  }
model-bin/finetune/base/{checkpoint-8212 β†’ checkpoint-8958}/training_args.bin RENAMED
File without changes
model-bin/finetune/base/{checkpoint-8336/scaler.pt β†’ log/1629481571.7415848/events.out.tfevents.1629481571.2977154bd390.32087.9} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:162281c921fbe203093bbc2adae6f3b62e85331a06e7f509a629558a6a8662b9
3
- size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67f17e14d8cf993f84d88b28619579c5abb3e0c20187e367c7928549f62e28a9
3
+ size 4194
model-bin/finetune/base/{checkpoint-8336/rng_state.pth β†’ log/events.out.tfevents.1629481571.2977154bd390.32087.8} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec13d36c08d3fcc58e321efac60e8451e506e89e6cacc44674409ea4dade1ec6
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ce1f4c04e01e6683904fb61a0dba575d640fb8538ce171444466011b00fa888
3
+ size 24078