tanmayplanet32 commited on
Commit
0c97244
·
1 Parent(s): 6d254f5

Add config data

Browse files
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+
2
+ # Wav2vec2-Large-English
3
+
4
+ Fine-tuned [facebook/wav2vec2-large](https://huggingface.co/facebook/wav2vec2-large) on English using the [Common Voice](https://huggingface.co/datasets/common_voice).
5
+ When using this model, make sure that your speech input is sampled at 16kHz.
6
+
config.json CHANGED
@@ -1,15 +1,13 @@
1
  {
2
- "_name_or_path": "tanmayplanet32/english-model",
3
  "activation_dropout": 0.05,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
- "Wav2Vec2ForSpeechClassification"
7
  ],
8
  "attention_dropout": 0.1,
9
  "bos_token_id": 1,
10
- "codevector_dim": 256,
11
- "contrastive_logits_temperature": 0.1,
12
- "conv_bias": true,
13
  "conv_dim": [
14
  512,
15
  512,
@@ -39,64 +37,34 @@
39
  ],
40
  "ctc_loss_reduction": "mean",
41
  "ctc_zero_infinity": true,
42
- "diversity_loss_weight": 0.1,
43
- "do_stable_layer_norm": true,
44
  "eos_token_id": 2,
45
  "feat_extract_activation": "gelu",
46
  "feat_extract_dropout": 0.0,
47
- "feat_extract_norm": "layer",
48
  "feat_proj_dropout": 0.05,
49
- "feat_quantizer_dropout": 0.0,
50
- "final_dropout": 0.0,
51
- "finetuning_task": "wav2vec2_clf",
52
  "gradient_checkpointing": true,
53
  "hidden_act": "gelu",
54
  "hidden_dropout": 0.05,
 
55
  "hidden_size": 1024,
56
- "id2label": {
57
- "0": "anger",
58
- "1": "disgust",
59
- "2": "fear",
60
- "3": "happiness",
61
- "4": "sadness"
62
- },
63
  "initializer_range": 0.02,
64
  "intermediate_size": 4096,
65
- "label2id": {
66
- "anger": 0,
67
- "disgust": 1,
68
- "fear": 2,
69
- "happiness": 3,
70
- "sadness": 4
71
- },
72
  "layer_norm_eps": 1e-05,
73
  "layerdrop": 0.05,
74
- "mask_channel_length": 10,
75
- "mask_channel_min_space": 1,
76
- "mask_channel_other": 0.0,
77
- "mask_channel_prob": 0.0,
78
- "mask_channel_selection": "static",
79
  "mask_feature_length": 10,
80
  "mask_feature_prob": 0.0,
81
  "mask_time_length": 10,
82
- "mask_time_min_space": 1,
83
- "mask_time_other": 0.0,
84
  "mask_time_prob": 0.05,
85
- "mask_time_selection": "static",
86
  "model_type": "wav2vec2",
87
  "num_attention_heads": 16,
88
- "num_codevector_groups": 2,
89
- "num_codevectors_per_group": 320,
90
  "num_conv_pos_embedding_groups": 16,
91
  "num_conv_pos_embeddings": 128,
92
  "num_feat_extract_layers": 7,
93
  "num_hidden_layers": 24,
94
- "num_negatives": 100,
95
  "pad_token_id": 0,
96
  "pooling_mode": "mean",
97
- "problem_type": "single_label_classification",
98
- "proj_codevector_dim": 256,
99
- "torch_dtype": "float32",
100
- "transformers_version": "4.10.0.dev0",
101
  "vocab_size": 33
102
- }
 
1
  {
2
+ "_name_or_path": "facebook/wav2vec2-large",
3
  "activation_dropout": 0.05,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
+ "Wav2Vec2ForCTC"
7
  ],
8
  "attention_dropout": 0.1,
9
  "bos_token_id": 1,
10
+ "conv_bias": false,
 
 
11
  "conv_dim": [
12
  512,
13
  512,
 
37
  ],
38
  "ctc_loss_reduction": "mean",
39
  "ctc_zero_infinity": true,
40
+ "do_stable_layer_norm": false,
 
41
  "eos_token_id": 2,
42
  "feat_extract_activation": "gelu",
43
  "feat_extract_dropout": 0.0,
44
+ "feat_extract_norm": "group",
45
  "feat_proj_dropout": 0.05,
46
+ "final_dropout": 0.1,
 
 
47
  "gradient_checkpointing": true,
48
  "hidden_act": "gelu",
49
  "hidden_dropout": 0.05,
50
+ "hidden_dropout_prob": 0.1,
51
  "hidden_size": 1024,
 
 
 
 
 
 
 
52
  "initializer_range": 0.02,
53
  "intermediate_size": 4096,
 
 
 
 
 
 
 
54
  "layer_norm_eps": 1e-05,
55
  "layerdrop": 0.05,
 
 
 
 
 
56
  "mask_feature_length": 10,
57
  "mask_feature_prob": 0.0,
58
  "mask_time_length": 10,
 
 
59
  "mask_time_prob": 0.05,
 
60
  "model_type": "wav2vec2",
61
  "num_attention_heads": 16,
 
 
62
  "num_conv_pos_embedding_groups": 16,
63
  "num_conv_pos_embeddings": 128,
64
  "num_feat_extract_layers": 7,
65
  "num_hidden_layers": 24,
 
66
  "pad_token_id": 0,
67
  "pooling_mode": "mean",
68
+ "transformers_version": "4.7.0.dev0",
 
 
 
69
  "vocab_size": 33
70
+ }
preprocessor_config.json CHANGED
@@ -6,4 +6,4 @@
6
  "padding_value": 0.0,
7
  "return_attention_mask": true,
8
  "sampling_rate": 16000
9
- }
 
6
  "padding_value": 0.0,
7
  "return_attention_mask": true,
8
  "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef32d54a3ebc911d64e7a8b8d04896f0957bbab4e3da4c6c0ae42ab901d6e4e7
3
+ size 1262022892
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "'": 5, "-": 6, "A": 7, "B": 8, "C": 9, "D": 10, "E": 11, "F": 12, "G": 13, "H": 14, "I": 15, "J": 16, "K": 17, "L": 18, "M": 19, "N": 20, "O": 21, "P": 22, "Q": 23, "R": 24, "S": 25, "T": 26, "U": 27, "V": 28, "W": 29, "X": 30, "Y": 31, "Z": 32}