Upload model
Browse files- config.json +9 -2
config.json
CHANGED
|
@@ -28,7 +28,6 @@
|
|
| 28 |
"architectures": [
|
| 29 |
"XcodecModel"
|
| 30 |
],
|
| 31 |
-
"audio_channels": 1,
|
| 32 |
"block_dilations": [
|
| 33 |
1,
|
| 34 |
1
|
|
@@ -52,8 +51,12 @@
|
|
| 52 |
"output_dim": 256,
|
| 53 |
"sample_rate": 16000,
|
| 54 |
"semantic_model_config": {
|
|
|
|
| 55 |
"activation_dropout": 0.1,
|
| 56 |
"apply_spec_augment": true,
|
|
|
|
|
|
|
|
|
|
| 57 |
"attention_dropout": 0.1,
|
| 58 |
"classifier_proj_size": 256,
|
| 59 |
"conv_bias": false,
|
|
@@ -89,12 +92,15 @@
|
|
| 89 |
"ctc_zero_infinity": false,
|
| 90 |
"do_stable_layer_norm": false,
|
| 91 |
"feat_extract_activation": "gelu",
|
|
|
|
| 92 |
"feat_extract_norm": "group",
|
| 93 |
-
"feat_proj_dropout": 0.
|
| 94 |
"feat_proj_layer_norm": true,
|
| 95 |
"final_dropout": 0.1,
|
|
|
|
| 96 |
"hidden_act": "gelu",
|
| 97 |
"hidden_dropout": 0.1,
|
|
|
|
| 98 |
"hidden_size": 768,
|
| 99 |
"initializer_range": 0.02,
|
| 100 |
"intermediate_size": 3072,
|
|
@@ -112,6 +118,7 @@
|
|
| 112 |
"num_conv_pos_embeddings": 128,
|
| 113 |
"num_feat_extract_layers": 7,
|
| 114 |
"num_hidden_layers": 12,
|
|
|
|
| 115 |
"use_weighted_layer_sum": false,
|
| 116 |
"vocab_size": 32
|
| 117 |
},
|
|
|
|
| 28 |
"architectures": [
|
| 29 |
"XcodecModel"
|
| 30 |
],
|
|
|
|
| 31 |
"block_dilations": [
|
| 32 |
1,
|
| 33 |
1
|
|
|
|
| 51 |
"output_dim": 256,
|
| 52 |
"sample_rate": 16000,
|
| 53 |
"semantic_model_config": {
|
| 54 |
+
"_name_or_path": "facebook/hubert-base-ls960",
|
| 55 |
"activation_dropout": 0.1,
|
| 56 |
"apply_spec_augment": true,
|
| 57 |
+
"architectures": [
|
| 58 |
+
"HubertModel"
|
| 59 |
+
],
|
| 60 |
"attention_dropout": 0.1,
|
| 61 |
"classifier_proj_size": 256,
|
| 62 |
"conv_bias": false,
|
|
|
|
| 92 |
"ctc_zero_infinity": false,
|
| 93 |
"do_stable_layer_norm": false,
|
| 94 |
"feat_extract_activation": "gelu",
|
| 95 |
+
"feat_extract_dropout": 0.0,
|
| 96 |
"feat_extract_norm": "group",
|
| 97 |
+
"feat_proj_dropout": 0.1,
|
| 98 |
"feat_proj_layer_norm": true,
|
| 99 |
"final_dropout": 0.1,
|
| 100 |
+
"gradient_checkpointing": false,
|
| 101 |
"hidden_act": "gelu",
|
| 102 |
"hidden_dropout": 0.1,
|
| 103 |
+
"hidden_dropout_prob": 0.1,
|
| 104 |
"hidden_size": 768,
|
| 105 |
"initializer_range": 0.02,
|
| 106 |
"intermediate_size": 3072,
|
|
|
|
| 118 |
"num_conv_pos_embeddings": 128,
|
| 119 |
"num_feat_extract_layers": 7,
|
| 120 |
"num_hidden_layers": 12,
|
| 121 |
+
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
| 122 |
"use_weighted_layer_sum": false,
|
| 123 |
"vocab_size": 32
|
| 124 |
},
|