Model save

Browse files

Files changed (9) hide show

README.md +52 -39
added_tokens.json +4 -0
config.json +28 -56
model.safetensors +2 -2
preprocessor_config.json +8 -7
special_tokens_map.json +2 -2
tokenizer_config.json +17 -19
training_args.bin +2 -2
vocab.json +31 -32

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: apache-2.0
-base_model: facebook/wav2vec2-base-960h
 tags:
 - generated_from_trainer
 metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
 # Helldivers2ASR_V4
-This model is a fine-tuned version of [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 84.0411
-- Wer: 0.0353
 ## Model description
@@ -37,49 +37,62 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 8e-05
-- train_batch_size: 32
-- eval_batch_size: 32
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
-- num_epochs: 30
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Wer    |
 |:-------------:|:-----:|:----:|:---------------:|:------:|
-| 1558.3099     | 1.0   | 165  | 916.7892        | 0.4471 |
-| 1027.9072     | 2.0   | 330  | 601.7479        | 0.3119 |
-| 829.0856      | 3.0   | 495  | 517.5432        | 0.2504 |
-| 725.1977      | 4.0   | 660  | 431.1833        | 0.2064 |
-| 603.7012      | 5.0   | 825  | 377.6158        | 0.1907 |
-| 548.9523      | 6.0   | 990  | 422.9266        | 0.1910 |
-| 460.0548      | 7.0   | 1155 | 356.2549        | 0.1468 |
-| 433.2836      | 8.0   | 1320 | 304.4526        | 0.1413 |
-| 406.6106      | 9.0   | 1485 | 261.6118        | 0.1232 |
-| 369.8095      | 10.0  | 1650 | 252.4694        | 0.1141 |
-| 309.2204      | 11.0  | 1815 | 218.6043        | 0.0968 |
-| 304.9205      | 12.0  | 1980 | 207.7220        | 0.0947 |
-| 303.3279      | 13.0  | 2145 | 163.3759        | 0.0767 |
-| 232.3942      | 14.0  | 2310 | 138.1063        | 0.0631 |
-| 236.0941      | 15.0  | 2475 | 143.9604        | 0.0628 |
-| 206.3721      | 16.0  | 2640 | 178.2018        | 0.0743 |
-| 212.6076      | 17.0  | 2805 | 168.2616        | 0.0701 |
-| 196.8633      | 18.0  | 2970 | 250.8950        | 0.0913 |
-| 189.2393      | 19.0  | 3135 | 145.3700        | 0.0586 |
-| 181.7939      | 20.0  | 3300 | 142.6985        | 0.0623 |
-| 164.7163      | 21.0  | 3465 | 123.3177        | 0.0586 |
-| 163.0101      | 22.0  | 3630 | 101.7651        | 0.0440 |
-| 163.8242      | 23.0  | 3795 | 89.8875         | 0.0406 |
-| 156.9307      | 24.0  | 3960 | 125.6658        | 0.0513 |
-| 135.1071      | 25.0  | 4125 | 120.9448        | 0.0518 |
-| 129.4286      | 26.0  | 4290 | 102.5400        | 0.0413 |
-| 127.3218      | 27.0  | 4455 | 80.8292         | 0.0348 |
-| 129.6052      | 28.0  | 4620 | 83.5904         | 0.0358 |
-| 124.8196      | 29.0  | 4785 | 70.5415         | 0.0301 |
-| 100.2235      | 30.0  | 4950 | 84.0411         | 0.0353 |
 ### Framework versions

 ---
+license: mit
+base_model: facebook/w2v-bert-2.0
 tags:
 - generated_from_trainer
 metrics:
 # Helldivers2ASR_V4
+This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0526
+- Wer: 0.2050
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 16
+- eval_batch_size: 16
 - seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
+- lr_scheduler_warmup_ratio: 0.05
+- num_epochs: 40
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Wer    |
 |:-------------:|:-----:|:----:|:---------------:|:------:|
+| 5.2103        | 1.0   | 110  | 4.6387          | 0.9614 |
+| 4.0           | 2.0   | 220  | 3.7298          | 0.9614 |
+| 3.3401        | 3.0   | 330  | 3.1648          | 1.0    |
+| 3.027         | 4.0   | 440  | 2.9456          | 1.0    |
+| 2.8744        | 5.0   | 550  | 2.8111          | 0.9858 |
+| 2.7692        | 6.0   | 660  | 2.7143          | 0.9949 |
+| 2.6962        | 7.0   | 770  | 2.6292          | 0.9089 |
+| 2.6016        | 8.0   | 880  | 2.5115          | 0.9132 |
+| 2.4811        | 9.0   | 990  | 2.3195          | 0.8802 |
+| 2.2281        | 10.0  | 1100 | 1.9468          | 0.8849 |
+| 1.8929        | 11.0  | 1210 | 1.5638          | 0.8083 |
+| 1.5681        | 12.0  | 1320 | 1.2138          | 0.6911 |
+| 1.3159        | 13.0  | 1430 | 0.9585          | 0.6029 |
+| 1.1081        | 14.0  | 1540 | 0.7569          | 0.5468 |
+| 0.8903        | 15.0  | 1650 | 0.5943          | 0.4744 |
+| 0.751         | 16.0  | 1760 | 0.4671          | 0.4168 |
+| 0.6606        | 17.0  | 1870 | 0.3815          | 0.3865 |
+| 0.5773        | 18.0  | 1980 | 0.3169          | 0.3603 |
+| 0.5434        | 19.0  | 2090 | 0.2727          | 0.3353 |
+| 0.4453        | 20.0  | 2200 | 0.2387          | 0.3203 |
+| 0.4025        | 21.0  | 2310 | 0.2068          | 0.2942 |
+| 0.35          | 22.0  | 2420 | 0.1780          | 0.2871 |
+| 0.3426        | 23.0  | 2530 | 0.1695          | 0.2776 |
+| 0.3035        | 24.0  | 2640 | 0.1489          | 0.2654 |
+| 0.2355        | 25.0  | 2750 | 0.1264          | 0.2556 |
+| 0.2401        | 26.0  | 2860 | 0.1192          | 0.2420 |
+| 0.2204        | 27.0  | 2970 | 0.1079          | 0.2389 |
+| 0.2006        | 28.0  | 3080 | 0.1036          | 0.2308 |
+| 0.218         | 29.0  | 3190 | 0.0919          | 0.2296 |
+| 0.1975        | 30.0  | 3300 | 0.0880          | 0.2290 |
+| 0.1898        | 31.0  | 3410 | 0.0817          | 0.2196 |
+| 0.1778        | 32.0  | 3520 | 0.0751          | 0.2178 |
+| 0.1601        | 33.0  | 3630 | 0.0723          | 0.2109 |
+| 0.1703        | 34.0  | 3740 | 0.0722          | 0.2145 |
+| 0.1643        | 35.0  | 3850 | 0.0658          | 0.2115 |
+| 0.1479        | 36.0  | 3960 | 0.0640          | 0.2115 |
+| 0.128         | 37.0  | 4070 | 0.0639          | 0.2084 |
+| 0.1361        | 38.0  | 4180 | 0.0628          | 0.2096 |
+| 0.1164        | 39.0  | 4290 | 0.0587          | 0.2098 |
+| 0.1199        | 40.0  | 4400 | 0.0526          | 0.2050 |
 ### Framework versions

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 32,
+  "<s>": 31
+}

config.json CHANGED Viewed

@@ -1,86 +1,57 @@
 {
-  "_name_or_path": "facebook/wav2vec2-base-960h",
-  "activation_dropout": 0.1,
-  "adapter_attn_dim": null,
   "adapter_kernel_size": 3,
   "adapter_stride": 2,
   "add_adapter": false,
-  "apply_spec_augment": true,
   "architectures": [
-    "Wav2Vec2ForCTC"
   ],
   "attention_dropout": 0.1,
   "bos_token_id": 1,
-  "classifier_proj_size": 256,
-  "codevector_dim": 256,
   "contrastive_logits_temperature": 0.1,
-  "conv_bias": false,
-  "conv_dim": [
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512
-  ],
-  "conv_kernel": [
-    10,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2
-  ],
-  "conv_stride": [
-    5,
-    2,
-    2,
-    2,
-    2,
-    2,
-    2
-  ],
-  "ctc_loss_reduction": "sum",
   "ctc_zero_infinity": false,
   "diversity_loss_weight": 0.1,
-  "do_stable_layer_norm": false,
   "eos_token_id": 2,
-  "feat_extract_activation": "gelu",
-  "feat_extract_dropout": 0.0,
-  "feat_extract_norm": "group",
   "feat_proj_dropout": 0.1,
   "feat_quantizer_dropout": 0.0,
   "final_dropout": 0.1,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
   "hidden_dropout": 0.1,
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "layer_norm_eps": 1e-05,
   "layerdrop": 0.1,
   "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
   "mask_time_prob": 0.05,
-  "model_type": "wav2vec2",
-  "num_adapter_layers": 3,
-  "num_attention_heads": 12,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,
-  "num_conv_pos_embedding_groups": 16,
-  "num_conv_pos_embeddings": 128,
-  "num_feat_extract_layers": 7,
-  "num_hidden_layers": 12,
   "num_negatives": 100,
-  "output_hidden_size": 768,
-  "pad_token_id": 0,
-  "proj_codevector_dim": 256,
   "tdnn_dilation": [
     1,
     2,
@@ -104,7 +75,8 @@
   ],
   "torch_dtype": "float32",
   "transformers_version": "4.44.0",
   "use_weighted_layer_sum": false,
-  "vocab_size": 32,
   "xvector_output_dim": 512
 }

 {
+  "_name_or_path": "facebook/w2v-bert-2.0",
+  "activation_dropout": 0.0,
+  "adapter_act": "relu",
   "adapter_kernel_size": 3,
   "adapter_stride": 2,
   "add_adapter": false,
+  "apply_spec_augment": false,
   "architectures": [
+    "Wav2Vec2BertForCTC"
   ],
   "attention_dropout": 0.1,
   "bos_token_id": 1,
+  "classifier_proj_size": 768,
+  "codevector_dim": 768,
+  "conformer_conv_dropout": 0.1,
   "contrastive_logits_temperature": 0.1,
+  "conv_depthwise_kernel_size": 31,
+  "ctc_loss_reduction": "mean",
   "ctc_zero_infinity": false,
   "diversity_loss_weight": 0.1,
   "eos_token_id": 2,
   "feat_proj_dropout": 0.1,
   "feat_quantizer_dropout": 0.0,
+  "feature_projection_input_dim": 160,
   "final_dropout": 0.1,
+  "hidden_act": "swish",
   "hidden_dropout": 0.1,
+  "hidden_size": 1024,
   "initializer_range": 0.02,
+  "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
   "layerdrop": 0.1,
+  "left_max_position_embeddings": 64,
   "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
   "mask_time_prob": 0.05,
+  "max_source_positions": 5000,
+  "model_type": "wav2vec2-bert",
+  "num_adapter_layers": 1,
+  "num_attention_heads": 16,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,
+  "num_hidden_layers": 24,
   "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 29,
+  "position_embeddings_type": "relative_key",
+  "proj_codevector_dim": 768,
+  "right_max_position_embeddings": 8,
+  "rotary_embedding_base": 10000,
   "tdnn_dilation": [
     1,
     2,
   ],
   "torch_dtype": "float32",
   "transformers_version": "4.44.0",
+  "use_intermediate_ffn_before_adapter": false,
   "use_weighted_layer_sum": false,
+  "vocab_size": 33,
   "xvector_output_dim": 512
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d5f37acec2356d6b94ae04c69f9109c321de1c82067caf0ddbc5e671ec18de3
-size 377611120

 version https://git-lfs.github.com/spec/v1
+oid sha256:61236cfe0863ff88bccb30efb7a3a67974814943c8cd9b8f02458256adb3c011
+size 2322210012

preprocessor_config.json CHANGED Viewed

@@ -1,10 +1,11 @@
 {
-  "do_normalize": true,
-  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
-  "feature_size": 1,
   "padding_side": "right",
-  "padding_value": 0.0,
-  "processor_class": "Wav2Vec2Processor",
-  "return_attention_mask": false,
-  "sampling_rate": 16000
 }

 {
+  "feature_extractor_type": "SeamlessM4TFeatureExtractor",
+  "feature_size": 80,
+  "num_mel_bins": 80,
   "padding_side": "right",
+  "padding_value": 1,
+  "processor_class": "Wav2Vec2BertProcessor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "stride": 2
 }

special_tokens_map.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "bos_token": "<s>",
   "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
 }

 {
   "bos_token": "<s>",
   "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
 }

tokenizer_config.json CHANGED Viewed

@@ -1,50 +1,48 @@
 {
   "added_tokens_decoder": {
-    "0": {
-      "content": "<pad>",
       "lstrip": true,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": false
     },
-    "1": {
-      "content": "<s>",
       "lstrip": true,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": false
     },
-    "2": {
-      "content": "</s>",
-      "lstrip": true,
       "normalized": false,
-      "rstrip": true,
       "single_word": false,
-      "special": false
     },
-    "3": {
-      "content": "<unk>",
-      "lstrip": true,
       "normalized": false,
-      "rstrip": true,
       "single_word": false,
-      "special": false
     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "do_lower_case": false,
-  "do_normalize": true,
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<pad>",
-  "processor_class": "Wav2Vec2Processor",
   "replace_word_delimiter_char": " ",
-  "return_attention_mask": false,
   "target_lang": null,
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
-  "unk_token": "<unk>",
   "word_delimiter_token": "|"
 }

 {
   "added_tokens_decoder": {
+    "28": {
+      "content": "[UNK]",
       "lstrip": true,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": false
     },
+    "29": {
+      "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
       "rstrip": true,
       "single_word": false,
       "special": false
     },
+    "31": {
+      "content": "<s>",
+      "lstrip": false,
       "normalized": false,
+      "rstrip": false,
       "single_word": false,
+      "special": true
     },
+    "32": {
+      "content": "</s>",
+      "lstrip": false,
       "normalized": false,
+      "rstrip": false,
       "single_word": false,
+      "special": true
     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "do_lower_case": false,
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2BertProcessor",
   "replace_word_delimiter_char": " ",
   "target_lang": null,
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
   "word_delimiter_token": "|"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2a158f477aecab806aea37b3711400636302883c98410dd7d0cb26e56dcefc0
-size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a66ed745ef4ab29185f94d62922b5a23cd8407d3df54414e086e2f41ad18915
+size 5240

vocab.json CHANGED Viewed

@@ -1,34 +1,33 @@
 {
-  "'": 27,
-  "</s>": 2,
-  "<pad>": 0,
-  "<s>": 1,
-  "<unk>": 3,
-  "A": 7,
-  "B": 24,
-  "C": 19,
-  "D": 14,
-  "E": 5,
-  "F": 20,
-  "G": 21,
-  "H": 11,
-  "I": 10,
-  "J": 29,
-  "K": 26,
-  "L": 15,
-  "M": 17,
-  "N": 9,
-  "O": 8,
-  "P": 23,
-  "Q": 30,
-  "R": 13,
-  "S": 12,
-  "T": 6,
-  "U": 16,
-  "V": 25,
-  "W": 18,
-  "X": 28,
-  "Y": 22,
-  "Z": 31,
-  "|": 4
 }

 {
+  " ": 27,
+  "'": 26,
+  "[PAD]": 29,
+  "[UNK]": 28,
+  "a": 0,
+  "b": 1,
+  "c": 2,
+  "d": 3,
+  "e": 4,
+  "f": 5,
+  "g": 6,
+  "h": 7,
+  "i": 8,
+  "j": 9,
+  "k": 10,
+  "l": 11,
+  "m": 12,
+  "n": 13,
+  "o": 14,
+  "p": 15,
+  "q": 16,
+  "r": 17,
+  "s": 18,
+  "t": 19,
+  "u": 20,
+  "v": 21,
+  "w": 22,
+  "x": 23,
+  "y": 24,
+  "z": 25,
+  "|": 30
 }