8688chris commited on
Commit
384c363
·
verified ·
1 Parent(s): 0dc49c9

Model save

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: apache-2.0
3
- base_model: facebook/wav2vec2-base-960h
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # Helldivers2ASR_V4
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 84.0411
21
- - Wer: 0.0353
22
 
23
  ## Model description
24
 
@@ -37,49 +37,62 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 8e-05
41
- - train_batch_size: 32
42
- - eval_batch_size: 32
43
  - seed: 42
 
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: constant
46
- - num_epochs: 30
 
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Wer |
52
  |:-------------:|:-----:|:----:|:---------------:|:------:|
53
- | 1558.3099 | 1.0 | 165 | 916.7892 | 0.4471 |
54
- | 1027.9072 | 2.0 | 330 | 601.7479 | 0.3119 |
55
- | 829.0856 | 3.0 | 495 | 517.5432 | 0.2504 |
56
- | 725.1977 | 4.0 | 660 | 431.1833 | 0.2064 |
57
- | 603.7012 | 5.0 | 825 | 377.6158 | 0.1907 |
58
- | 548.9523 | 6.0 | 990 | 422.9266 | 0.1910 |
59
- | 460.0548 | 7.0 | 1155 | 356.2549 | 0.1468 |
60
- | 433.2836 | 8.0 | 1320 | 304.4526 | 0.1413 |
61
- | 406.6106 | 9.0 | 1485 | 261.6118 | 0.1232 |
62
- | 369.8095 | 10.0 | 1650 | 252.4694 | 0.1141 |
63
- | 309.2204 | 11.0 | 1815 | 218.6043 | 0.0968 |
64
- | 304.9205 | 12.0 | 1980 | 207.7220 | 0.0947 |
65
- | 303.3279 | 13.0 | 2145 | 163.3759 | 0.0767 |
66
- | 232.3942 | 14.0 | 2310 | 138.1063 | 0.0631 |
67
- | 236.0941 | 15.0 | 2475 | 143.9604 | 0.0628 |
68
- | 206.3721 | 16.0 | 2640 | 178.2018 | 0.0743 |
69
- | 212.6076 | 17.0 | 2805 | 168.2616 | 0.0701 |
70
- | 196.8633 | 18.0 | 2970 | 250.8950 | 0.0913 |
71
- | 189.2393 | 19.0 | 3135 | 145.3700 | 0.0586 |
72
- | 181.7939 | 20.0 | 3300 | 142.6985 | 0.0623 |
73
- | 164.7163 | 21.0 | 3465 | 123.3177 | 0.0586 |
74
- | 163.0101 | 22.0 | 3630 | 101.7651 | 0.0440 |
75
- | 163.8242 | 23.0 | 3795 | 89.8875 | 0.0406 |
76
- | 156.9307 | 24.0 | 3960 | 125.6658 | 0.0513 |
77
- | 135.1071 | 25.0 | 4125 | 120.9448 | 0.0518 |
78
- | 129.4286 | 26.0 | 4290 | 102.5400 | 0.0413 |
79
- | 127.3218 | 27.0 | 4455 | 80.8292 | 0.0348 |
80
- | 129.6052 | 28.0 | 4620 | 83.5904 | 0.0358 |
81
- | 124.8196 | 29.0 | 4785 | 70.5415 | 0.0301 |
82
- | 100.2235 | 30.0 | 4950 | 84.0411 | 0.0353 |
 
 
 
 
 
 
 
 
 
 
83
 
84
 
85
  ### Framework versions
 
1
  ---
2
+ license: mit
3
+ base_model: facebook/w2v-bert-2.0
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # Helldivers2ASR_V4
17
 
18
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0526
21
+ - Wer: 0.2050
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 3e-05
41
+ - train_batch_size: 16
42
+ - eval_batch_size: 16
43
  - seed: 42
44
+ - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 64
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: constant
48
+ - lr_scheduler_warmup_ratio: 0.05
49
+ - num_epochs: 40
50
  - mixed_precision_training: Native AMP
51
 
52
  ### Training results
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Wer |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|
56
+ | 5.2103 | 1.0 | 110 | 4.6387 | 0.9614 |
57
+ | 4.0 | 2.0 | 220 | 3.7298 | 0.9614 |
58
+ | 3.3401 | 3.0 | 330 | 3.1648 | 1.0 |
59
+ | 3.027 | 4.0 | 440 | 2.9456 | 1.0 |
60
+ | 2.8744 | 5.0 | 550 | 2.8111 | 0.9858 |
61
+ | 2.7692 | 6.0 | 660 | 2.7143 | 0.9949 |
62
+ | 2.6962 | 7.0 | 770 | 2.6292 | 0.9089 |
63
+ | 2.6016 | 8.0 | 880 | 2.5115 | 0.9132 |
64
+ | 2.4811 | 9.0 | 990 | 2.3195 | 0.8802 |
65
+ | 2.2281 | 10.0 | 1100 | 1.9468 | 0.8849 |
66
+ | 1.8929 | 11.0 | 1210 | 1.5638 | 0.8083 |
67
+ | 1.5681 | 12.0 | 1320 | 1.2138 | 0.6911 |
68
+ | 1.3159 | 13.0 | 1430 | 0.9585 | 0.6029 |
69
+ | 1.1081 | 14.0 | 1540 | 0.7569 | 0.5468 |
70
+ | 0.8903 | 15.0 | 1650 | 0.5943 | 0.4744 |
71
+ | 0.751 | 16.0 | 1760 | 0.4671 | 0.4168 |
72
+ | 0.6606 | 17.0 | 1870 | 0.3815 | 0.3865 |
73
+ | 0.5773 | 18.0 | 1980 | 0.3169 | 0.3603 |
74
+ | 0.5434 | 19.0 | 2090 | 0.2727 | 0.3353 |
75
+ | 0.4453 | 20.0 | 2200 | 0.2387 | 0.3203 |
76
+ | 0.4025 | 21.0 | 2310 | 0.2068 | 0.2942 |
77
+ | 0.35 | 22.0 | 2420 | 0.1780 | 0.2871 |
78
+ | 0.3426 | 23.0 | 2530 | 0.1695 | 0.2776 |
79
+ | 0.3035 | 24.0 | 2640 | 0.1489 | 0.2654 |
80
+ | 0.2355 | 25.0 | 2750 | 0.1264 | 0.2556 |
81
+ | 0.2401 | 26.0 | 2860 | 0.1192 | 0.2420 |
82
+ | 0.2204 | 27.0 | 2970 | 0.1079 | 0.2389 |
83
+ | 0.2006 | 28.0 | 3080 | 0.1036 | 0.2308 |
84
+ | 0.218 | 29.0 | 3190 | 0.0919 | 0.2296 |
85
+ | 0.1975 | 30.0 | 3300 | 0.0880 | 0.2290 |
86
+ | 0.1898 | 31.0 | 3410 | 0.0817 | 0.2196 |
87
+ | 0.1778 | 32.0 | 3520 | 0.0751 | 0.2178 |
88
+ | 0.1601 | 33.0 | 3630 | 0.0723 | 0.2109 |
89
+ | 0.1703 | 34.0 | 3740 | 0.0722 | 0.2145 |
90
+ | 0.1643 | 35.0 | 3850 | 0.0658 | 0.2115 |
91
+ | 0.1479 | 36.0 | 3960 | 0.0640 | 0.2115 |
92
+ | 0.128 | 37.0 | 4070 | 0.0639 | 0.2084 |
93
+ | 0.1361 | 38.0 | 4180 | 0.0628 | 0.2096 |
94
+ | 0.1164 | 39.0 | 4290 | 0.0587 | 0.2098 |
95
+ | 0.1199 | 40.0 | 4400 | 0.0526 | 0.2050 |
96
 
97
 
98
  ### Framework versions
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 32,
3
+ "<s>": 31
4
+ }
config.json CHANGED
@@ -1,86 +1,57 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-base-960h",
3
- "activation_dropout": 0.1,
4
- "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
6
  "adapter_stride": 2,
7
  "add_adapter": false,
8
- "apply_spec_augment": true,
9
  "architectures": [
10
- "Wav2Vec2ForCTC"
11
  ],
12
  "attention_dropout": 0.1,
13
  "bos_token_id": 1,
14
- "classifier_proj_size": 256,
15
- "codevector_dim": 256,
 
16
  "contrastive_logits_temperature": 0.1,
17
- "conv_bias": false,
18
- "conv_dim": [
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512,
25
- 512
26
- ],
27
- "conv_kernel": [
28
- 10,
29
- 3,
30
- 3,
31
- 3,
32
- 3,
33
- 2,
34
- 2
35
- ],
36
- "conv_stride": [
37
- 5,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2,
43
- 2
44
- ],
45
- "ctc_loss_reduction": "sum",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
- "do_stable_layer_norm": false,
49
  "eos_token_id": 2,
50
- "feat_extract_activation": "gelu",
51
- "feat_extract_dropout": 0.0,
52
- "feat_extract_norm": "group",
53
  "feat_proj_dropout": 0.1,
54
  "feat_quantizer_dropout": 0.0,
 
55
  "final_dropout": 0.1,
56
- "gradient_checkpointing": false,
57
- "hidden_act": "gelu",
58
  "hidden_dropout": 0.1,
59
- "hidden_dropout_prob": 0.1,
60
- "hidden_size": 768,
61
  "initializer_range": 0.02,
62
- "intermediate_size": 3072,
63
  "layer_norm_eps": 1e-05,
64
  "layerdrop": 0.1,
 
65
  "mask_feature_length": 10,
66
  "mask_feature_min_masks": 0,
67
  "mask_feature_prob": 0.0,
68
  "mask_time_length": 10,
69
  "mask_time_min_masks": 2,
70
  "mask_time_prob": 0.05,
71
- "model_type": "wav2vec2",
72
- "num_adapter_layers": 3,
73
- "num_attention_heads": 12,
 
74
  "num_codevector_groups": 2,
75
  "num_codevectors_per_group": 320,
76
- "num_conv_pos_embedding_groups": 16,
77
- "num_conv_pos_embeddings": 128,
78
- "num_feat_extract_layers": 7,
79
- "num_hidden_layers": 12,
80
  "num_negatives": 100,
81
- "output_hidden_size": 768,
82
- "pad_token_id": 0,
83
- "proj_codevector_dim": 256,
 
 
 
84
  "tdnn_dilation": [
85
  1,
86
  2,
@@ -104,7 +75,8 @@
104
  ],
105
  "torch_dtype": "float32",
106
  "transformers_version": "4.44.0",
 
107
  "use_weighted_layer_sum": false,
108
- "vocab_size": 32,
109
  "xvector_output_dim": 512
110
  }
 
1
  {
2
+ "_name_or_path": "facebook/w2v-bert-2.0",
3
+ "activation_dropout": 0.0,
4
+ "adapter_act": "relu",
5
  "adapter_kernel_size": 3,
6
  "adapter_stride": 2,
7
  "add_adapter": false,
8
+ "apply_spec_augment": false,
9
  "architectures": [
10
+ "Wav2Vec2BertForCTC"
11
  ],
12
  "attention_dropout": 0.1,
13
  "bos_token_id": 1,
14
+ "classifier_proj_size": 768,
15
+ "codevector_dim": 768,
16
+ "conformer_conv_dropout": 0.1,
17
  "contrastive_logits_temperature": 0.1,
18
+ "conv_depthwise_kernel_size": 31,
19
+ "ctc_loss_reduction": "mean",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "ctc_zero_infinity": false,
21
  "diversity_loss_weight": 0.1,
 
22
  "eos_token_id": 2,
 
 
 
23
  "feat_proj_dropout": 0.1,
24
  "feat_quantizer_dropout": 0.0,
25
+ "feature_projection_input_dim": 160,
26
  "final_dropout": 0.1,
27
+ "hidden_act": "swish",
 
28
  "hidden_dropout": 0.1,
29
+ "hidden_size": 1024,
 
30
  "initializer_range": 0.02,
31
+ "intermediate_size": 4096,
32
  "layer_norm_eps": 1e-05,
33
  "layerdrop": 0.1,
34
+ "left_max_position_embeddings": 64,
35
  "mask_feature_length": 10,
36
  "mask_feature_min_masks": 0,
37
  "mask_feature_prob": 0.0,
38
  "mask_time_length": 10,
39
  "mask_time_min_masks": 2,
40
  "mask_time_prob": 0.05,
41
+ "max_source_positions": 5000,
42
+ "model_type": "wav2vec2-bert",
43
+ "num_adapter_layers": 1,
44
+ "num_attention_heads": 16,
45
  "num_codevector_groups": 2,
46
  "num_codevectors_per_group": 320,
47
+ "num_hidden_layers": 24,
 
 
 
48
  "num_negatives": 100,
49
+ "output_hidden_size": 1024,
50
+ "pad_token_id": 29,
51
+ "position_embeddings_type": "relative_key",
52
+ "proj_codevector_dim": 768,
53
+ "right_max_position_embeddings": 8,
54
+ "rotary_embedding_base": 10000,
55
  "tdnn_dilation": [
56
  1,
57
  2,
 
75
  ],
76
  "torch_dtype": "float32",
77
  "transformers_version": "4.44.0",
78
+ "use_intermediate_ffn_before_adapter": false,
79
  "use_weighted_layer_sum": false,
80
+ "vocab_size": 33,
81
  "xvector_output_dim": 512
82
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d5f37acec2356d6b94ae04c69f9109c321de1c82067caf0ddbc5e671ec18de3
3
- size 377611120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61236cfe0863ff88bccb30efb7a3a67974814943c8cd9b8f02458256adb3c011
3
+ size 2322210012
preprocessor_config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
  "padding_side": "right",
6
- "padding_value": 0.0,
7
- "processor_class": "Wav2Vec2Processor",
8
- "return_attention_mask": false,
9
- "sampling_rate": 16000
 
10
  }
 
1
  {
2
+ "feature_extractor_type": "SeamlessM4TFeatureExtractor",
3
+ "feature_size": 80,
4
+ "num_mel_bins": 80,
5
  "padding_side": "right",
6
+ "padding_value": 1,
7
+ "processor_class": "Wav2Vec2BertProcessor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000,
10
+ "stride": 2
11
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": "<pad>",
5
- "unk_token": "<unk>"
6
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
  }
tokenizer_config.json CHANGED
@@ -1,50 +1,48 @@
1
  {
2
  "added_tokens_decoder": {
3
- "0": {
4
- "content": "<pad>",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
- "1": {
12
- "content": "<s>",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
16
  "single_word": false,
17
  "special": false
18
  },
19
- "2": {
20
- "content": "</s>",
21
- "lstrip": true,
22
  "normalized": false,
23
- "rstrip": true,
24
  "single_word": false,
25
- "special": false
26
  },
27
- "3": {
28
- "content": "<unk>",
29
- "lstrip": true,
30
  "normalized": false,
31
- "rstrip": true,
32
  "single_word": false,
33
- "special": false
34
  }
35
  },
36
  "bos_token": "<s>",
37
  "clean_up_tokenization_spaces": true,
38
  "do_lower_case": false,
39
- "do_normalize": true,
40
  "eos_token": "</s>",
41
  "model_max_length": 1000000000000000019884624838656,
42
- "pad_token": "<pad>",
43
- "processor_class": "Wav2Vec2Processor",
44
  "replace_word_delimiter_char": " ",
45
- "return_attention_mask": false,
46
  "target_lang": null,
47
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
48
- "unk_token": "<unk>",
49
  "word_delimiter_token": "|"
50
  }
 
1
  {
2
  "added_tokens_decoder": {
3
+ "28": {
4
+ "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "29": {
12
+ "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "31": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
  "normalized": false,
23
+ "rstrip": false,
24
  "single_word": false,
25
+ "special": true
26
  },
27
+ "32": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
  "normalized": false,
31
+ "rstrip": false,
32
  "single_word": false,
33
+ "special": true
34
  }
35
  },
36
  "bos_token": "<s>",
37
  "clean_up_tokenization_spaces": true,
38
  "do_lower_case": false,
 
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "[PAD]",
42
+ "processor_class": "Wav2Vec2BertProcessor",
43
  "replace_word_delimiter_char": " ",
 
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "[UNK]",
47
  "word_delimiter_token": "|"
48
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2a158f477aecab806aea37b3711400636302883c98410dd7d0cb26e56dcefc0
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a66ed745ef4ab29185f94d62922b5a23cd8407d3df54414e086e2f41ad18915
3
+ size 5240
vocab.json CHANGED
@@ -1,34 +1,33 @@
1
  {
2
- "'": 27,
3
- "</s>": 2,
4
- "<pad>": 0,
5
- "<s>": 1,
6
- "<unk>": 3,
7
- "A": 7,
8
- "B": 24,
9
- "C": 19,
10
- "D": 14,
11
- "E": 5,
12
- "F": 20,
13
- "G": 21,
14
- "H": 11,
15
- "I": 10,
16
- "J": 29,
17
- "K": 26,
18
- "L": 15,
19
- "M": 17,
20
- "N": 9,
21
- "O": 8,
22
- "P": 23,
23
- "Q": 30,
24
- "R": 13,
25
- "S": 12,
26
- "T": 6,
27
- "U": 16,
28
- "V": 25,
29
- "W": 18,
30
- "X": 28,
31
- "Y": 22,
32
- "Z": 31,
33
- "|": 4
34
  }
 
1
  {
2
+ " ": 27,
3
+ "'": 26,
4
+ "[PAD]": 29,
5
+ "[UNK]": 28,
6
+ "a": 0,
7
+ "b": 1,
8
+ "c": 2,
9
+ "d": 3,
10
+ "e": 4,
11
+ "f": 5,
12
+ "g": 6,
13
+ "h": 7,
14
+ "i": 8,
15
+ "j": 9,
16
+ "k": 10,
17
+ "l": 11,
18
+ "m": 12,
19
+ "n": 13,
20
+ "o": 14,
21
+ "p": 15,
22
+ "q": 16,
23
+ "r": 17,
24
+ "s": 18,
25
+ "t": 19,
26
+ "u": 20,
27
+ "v": 21,
28
+ "w": 22,
29
+ "x": 23,
30
+ "y": 24,
31
+ "z": 25,
32
+ "|": 30
 
33
  }