Paywinful commited on
Commit
a2c1940
·
verified ·
1 Parent(s): 94e3052

End of training

Browse files
README.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: mit
4
+ base_model: facebook/w2v-bert-2.0
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: output
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # output
16
+
17
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the None dataset.
18
+
19
+ ## Model description
20
+
21
+ More information needed
22
+
23
+ ## Intended uses & limitations
24
+
25
+ More information needed
26
+
27
+ ## Training and evaluation data
28
+
29
+ More information needed
30
+
31
+ ## Training procedure
32
+
33
+ ### Training hyperparameters
34
+
35
+ The following hyperparameters were used during training:
36
+ - learning_rate: 3e-05
37
+ - train_batch_size: 8
38
+ - eval_batch_size: 8
39
+ - seed: 42
40
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
41
+ - lr_scheduler_type: cosine
42
+ - lr_scheduler_warmup_steps: 2500
43
+ - training_steps: 25000
44
+ - mixed_precision_training: Native AMP
45
+
46
+ ### Framework versions
47
+
48
+ - Transformers 4.53.2
49
+ - Pytorch 2.6.0+cu124
50
+ - Datasets 2.14.4
51
+ - Tokenizers 0.21.2
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 42,
3
- "<s>": 41
4
  }
 
1
  {
2
+ "</s>": 67,
3
+ "<s>": 66
4
  }
config.json CHANGED
@@ -1,85 +1,56 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
  "activation_dropout": 0.0,
4
- "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
6
  "adapter_stride": 2,
7
- "add_adapter": false,
8
- "apply_spec_augment": true,
9
  "architectures": [
10
- "Wav2Vec2ForCTC"
11
  ],
12
  "attention_dropout": 0.1,
13
  "bos_token_id": 1,
14
- "classifier_proj_size": 256,
15
  "codevector_dim": 768,
 
16
  "contrastive_logits_temperature": 0.1,
17
- "conv_bias": true,
18
- "conv_dim": [
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512,
25
- 512
26
- ],
27
- "conv_kernel": [
28
- 10,
29
- 3,
30
- 3,
31
- 3,
32
- 3,
33
- 2,
34
- 2
35
- ],
36
- "conv_stride": [
37
- 5,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2,
43
- 2
44
- ],
45
  "ctc_loss_reduction": "mean",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
- "do_stable_layer_norm": true,
49
  "eos_token_id": 2,
50
- "feat_extract_activation": "gelu",
51
- "feat_extract_dropout": 0.0,
52
- "feat_extract_norm": "layer",
53
  "feat_proj_dropout": 0.1,
54
  "feat_quantizer_dropout": 0.0,
55
- "final_dropout": 0.0,
56
- "gradient_checkpointing": false,
57
- "hidden_act": "gelu",
58
  "hidden_dropout": 0.1,
59
  "hidden_size": 1024,
60
  "initializer_range": 0.02,
61
  "intermediate_size": 4096,
62
  "layer_norm_eps": 1e-05,
63
  "layerdrop": 0.0,
 
64
  "mask_feature_length": 10,
65
  "mask_feature_min_masks": 0,
66
  "mask_feature_prob": 0.0,
67
  "mask_time_length": 10,
68
  "mask_time_min_masks": 2,
69
  "mask_time_prob": 0.05,
70
- "model_type": "wav2vec2",
71
- "num_adapter_layers": 3,
 
72
  "num_attention_heads": 16,
73
  "num_codevector_groups": 2,
74
  "num_codevectors_per_group": 320,
75
- "num_conv_pos_embedding_groups": 16,
76
- "num_conv_pos_embeddings": 128,
77
- "num_feat_extract_layers": 7,
78
  "num_hidden_layers": 24,
79
  "num_negatives": 100,
80
  "output_hidden_size": 1024,
81
- "pad_token_id": 40,
 
82
  "proj_codevector_dim": 768,
 
 
83
  "tdnn_dilation": [
84
  1,
85
  2,
@@ -102,8 +73,9 @@
102
  1
103
  ],
104
  "torch_dtype": "float32",
105
- "transformers_version": "4.49.0",
 
106
  "use_weighted_layer_sum": false,
107
- "vocab_size": 43,
108
  "xvector_output_dim": 512
109
  }
 
1
  {
 
2
  "activation_dropout": 0.0,
3
+ "adapter_act": "relu",
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
6
+ "add_adapter": true,
7
+ "apply_spec_augment": false,
8
  "architectures": [
9
+ "Wav2Vec2BertForCTC"
10
  ],
11
  "attention_dropout": 0.1,
12
  "bos_token_id": 1,
13
+ "classifier_proj_size": 768,
14
  "codevector_dim": 768,
15
+ "conformer_conv_dropout": 0.1,
16
  "contrastive_logits_temperature": 0.1,
17
+ "conv_depthwise_kernel_size": 31,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "ctc_loss_reduction": "mean",
19
  "ctc_zero_infinity": false,
20
  "diversity_loss_weight": 0.1,
 
21
  "eos_token_id": 2,
 
 
 
22
  "feat_proj_dropout": 0.1,
23
  "feat_quantizer_dropout": 0.0,
24
+ "feature_projection_input_dim": 160,
25
+ "final_dropout": 0.1,
26
+ "hidden_act": "swish",
27
  "hidden_dropout": 0.1,
28
  "hidden_size": 1024,
29
  "initializer_range": 0.02,
30
  "intermediate_size": 4096,
31
  "layer_norm_eps": 1e-05,
32
  "layerdrop": 0.0,
33
+ "left_max_position_embeddings": 64,
34
  "mask_feature_length": 10,
35
  "mask_feature_min_masks": 0,
36
  "mask_feature_prob": 0.0,
37
  "mask_time_length": 10,
38
  "mask_time_min_masks": 2,
39
  "mask_time_prob": 0.05,
40
+ "max_source_positions": 5000,
41
+ "model_type": "wav2vec2-bert",
42
+ "num_adapter_layers": 1,
43
  "num_attention_heads": 16,
44
  "num_codevector_groups": 2,
45
  "num_codevectors_per_group": 320,
 
 
 
46
  "num_hidden_layers": 24,
47
  "num_negatives": 100,
48
  "output_hidden_size": 1024,
49
+ "pad_token_id": 65,
50
+ "position_embeddings_type": "relative_key",
51
  "proj_codevector_dim": 768,
52
+ "right_max_position_embeddings": 8,
53
+ "rotary_embedding_base": 10000,
54
  "tdnn_dilation": [
55
  1,
56
  2,
 
73
  1
74
  ],
75
  "torch_dtype": "float32",
76
+ "transformers_version": "4.53.2",
77
+ "use_intermediate_ffn_before_adapter": false,
78
  "use_weighted_layer_sum": false,
79
+ "vocab_size": 68,
80
  "xvector_output_dim": 512
81
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd7bb5fa971ca516e73a5d89bd019bf74400d9a69bf980a8569235df774ff86
3
- size 1261983780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2031f2b1d22ccd285550da43b09e166eb1c682bcb1ab58a150d8289dda7c82e
3
+ size 2423097560
preprocessor_config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
  "padding_side": "right",
6
- "padding_value": 0.0,
7
- "processor_class": "Wav2Vec2Processor",
8
  "return_attention_mask": true,
9
- "sampling_rate": 16000
 
10
  }
 
1
  {
2
+ "feature_extractor_type": "SeamlessM4TFeatureExtractor",
3
+ "feature_size": 80,
4
+ "num_mel_bins": 80,
5
  "padding_side": "right",
6
+ "padding_value": 1,
7
+ "processor_class": "Wav2Vec2BertProcessor",
8
  "return_attention_mask": true,
9
+ "sampling_rate": 16000,
10
+ "stride": 2
11
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,30 @@
1
  {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "[UNK]",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": true,
28
+ "single_word": false
29
+ }
30
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "39": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "40": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "41": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "42": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
@@ -40,7 +40,7 @@
40
  "extra_special_tokens": {},
41
  "model_max_length": 1000000000000000019884624838656,
42
  "pad_token": "[PAD]",
43
- "processor_class": "Wav2Vec2Processor",
44
  "replace_word_delimiter_char": " ",
45
  "target_lang": null,
46
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
1
  {
2
  "added_tokens_decoder": {
3
+ "64": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "65": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "66": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "67": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
40
  "extra_special_tokens": {},
41
  "model_max_length": 1000000000000000019884624838656,
42
  "pad_token": "[PAD]",
43
+ "processor_class": "Wav2Vec2BertProcessor",
44
  "replace_word_delimiter_char": " ",
45
  "target_lang": null,
46
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a382a06010474ba7fcd3018791c974260b093b1619e654ea910979fa2f729840
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:483152d7aa715022a9aa872223d78a024157391bde94813dcaf228b4a12b762d
3
  size 5368
vocab.json CHANGED
@@ -1,43 +1,68 @@
1
  {
2
  "0": 1,
3
  "1": 2,
4
- "2": 3,
5
- "3": 4,
6
- "4": 5,
7
- "5": 6,
8
- "6": 7,
9
- "7": 8,
10
- "8": 9,
11
- "9": 10,
12
- "[PAD]": 40,
13
- "[UNK]": 39,
14
- "a": 11,
15
- "b": 12,
16
- "c": 13,
17
- "d": 14,
18
- "e": 15,
19
- "f": 16,
20
- "g": 17,
21
- "h": 18,
22
- "i": 19,
23
- "j": 20,
24
- "k": 21,
25
- "l": 22,
26
- "m": 23,
27
- "n": 24,
28
- "o": 25,
29
- "p": 26,
30
- "q": 27,
31
- "r": 28,
32
- "s": 29,
33
- "t": 30,
34
- "u": 31,
35
- "v": 32,
36
- "w": 33,
37
- "x": 34,
38
- "y": 35,
39
- "z": 36,
40
  "|": 0,
41
- "é": 37,
42
- "ì": 38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
 
1
  {
2
  "0": 1,
3
  "1": 2,
4
+ "9": 3,
5
+ "[PAD]": 65,
6
+ "[UNK]": 64,
7
+ "a": 4,
8
+ "b": 5,
9
+ "c": 6,
10
+ "d": 7,
11
+ "e": 8,
12
+ "f": 9,
13
+ "g": 10,
14
+ "h": 11,
15
+ "i": 12,
16
+ "j": 13,
17
+ "k": 14,
18
+ "l": 15,
19
+ "m": 16,
20
+ "n": 17,
21
+ "o": 18,
22
+ "p": 19,
23
+ "q": 20,
24
+ "r": 21,
25
+ "s": 22,
26
+ "t": 23,
27
+ "u": 24,
28
+ "v": 25,
29
+ "w": 26,
30
+ "x": 27,
31
+ "y": 28,
32
+ "z": 29,
 
 
 
 
 
 
 
33
  "|": 0,
34
+ "~": 30,
35
+ "à": 31,
36
+ "á": 32,
37
+ "ã": 33,
38
+ "è": 34,
39
+ "é": 35,
40
+ "ì": 36,
41
+ "í": 37,
42
+ "ò": 38,
43
+ "ó": 39,
44
+ "õ": 40,
45
+ "ù": 41,
46
+ "ú": 42,
47
+ "ā": 43,
48
+ "ē": 44,
49
+ "ĩ": 45,
50
+ "ŋ": 46,
51
+ "ũ": 47,
52
+ "ū": 48,
53
+ "ƒ": 49,
54
+ "ɔ": 50,
55
+ "ɖ": 51,
56
+ "ɛ": 52,
57
+ "ɣ": 53,
58
+ "ʋ": 54,
59
+ "̀": 55,
60
+ "́": 56,
61
+ "̃": 57,
62
+ "̄": 58,
63
+ "ͻ": 59,
64
+ "ε": 60,
65
+ "ѐ": 61,
66
+ "ẽ": 62,
67
+ "ὸ": 63
68
  }