Instructions to use bezzam/xcodec2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use bezzam/xcodec2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="bezzam/xcodec2")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("bezzam/xcodec2", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Upload model
Browse files- config.json +9 -2
config.json
CHANGED
|
@@ -17,6 +17,7 @@
|
|
| 17 |
"head_dim": 64,
|
| 18 |
"hidden_act": "silu",
|
| 19 |
"initializer_range": 0.02,
|
|
|
|
| 20 |
"max_position_embeddings": 4096,
|
| 21 |
"model_type": "xcodec2",
|
| 22 |
"num_attention_heads": 16,
|
|
@@ -25,7 +26,10 @@
|
|
| 25 |
"num_quantizers": 1,
|
| 26 |
"resnet_dropout": 0.1,
|
| 27 |
"rms_norm_eps": 1e-06,
|
| 28 |
-
"
|
|
|
|
|
|
|
|
|
|
| 29 |
"sampling_rate": 16000,
|
| 30 |
"semantic_model_config": {
|
| 31 |
"_name_or_path": "facebook/w2v-bert-2.0",
|
|
@@ -39,6 +43,7 @@
|
|
| 39 |
"Wav2Vec2BertModel"
|
| 40 |
],
|
| 41 |
"attention_dropout": 0.0,
|
|
|
|
| 42 |
"classifier_proj_size": 768,
|
| 43 |
"codevector_dim": 768,
|
| 44 |
"conformer_conv_dropout": 0.1,
|
|
@@ -48,6 +53,7 @@
|
|
| 48 |
"ctc_zero_infinity": false,
|
| 49 |
"diversity_loss_weight": 0.1,
|
| 50 |
"dtype": "float32",
|
|
|
|
| 51 |
"feat_proj_dropout": 0.0,
|
| 52 |
"feat_quantizer_dropout": 0.0,
|
| 53 |
"feature_projection_input_dim": 160,
|
|
@@ -76,6 +82,7 @@
|
|
| 76 |
"num_negatives": 100,
|
| 77 |
"output_hidden_size": 1024,
|
| 78 |
"output_hidden_states": true,
|
|
|
|
| 79 |
"position_embeddings_type": "relative_key",
|
| 80 |
"proj_codevector_dim": 768,
|
| 81 |
"right_max_position_embeddings": 8,
|
|
@@ -107,7 +114,7 @@
|
|
| 107 |
"xvector_output_dim": 512
|
| 108 |
},
|
| 109 |
"semantic_model_id": "facebook/w2v-bert-2.0",
|
| 110 |
-
"transformers_version": "
|
| 111 |
"vq_dim": 2048,
|
| 112 |
"vq_levels": [
|
| 113 |
4,
|
|
|
|
| 17 |
"head_dim": 64,
|
| 18 |
"hidden_act": "silu",
|
| 19 |
"initializer_range": 0.02,
|
| 20 |
+
"istft_padding": "same",
|
| 21 |
"max_position_embeddings": 4096,
|
| 22 |
"model_type": "xcodec2",
|
| 23 |
"num_attention_heads": 16,
|
|
|
|
| 26 |
"num_quantizers": 1,
|
| 27 |
"resnet_dropout": 0.1,
|
| 28 |
"rms_norm_eps": 1e-06,
|
| 29 |
+
"rope_parameters": {
|
| 30 |
+
"rope_theta": 10000.0,
|
| 31 |
+
"rope_type": "default"
|
| 32 |
+
},
|
| 33 |
"sampling_rate": 16000,
|
| 34 |
"semantic_model_config": {
|
| 35 |
"_name_or_path": "facebook/w2v-bert-2.0",
|
|
|
|
| 43 |
"Wav2Vec2BertModel"
|
| 44 |
],
|
| 45 |
"attention_dropout": 0.0,
|
| 46 |
+
"bos_token_id": 1,
|
| 47 |
"classifier_proj_size": 768,
|
| 48 |
"codevector_dim": 768,
|
| 49 |
"conformer_conv_dropout": 0.1,
|
|
|
|
| 53 |
"ctc_zero_infinity": false,
|
| 54 |
"diversity_loss_weight": 0.1,
|
| 55 |
"dtype": "float32",
|
| 56 |
+
"eos_token_id": 2,
|
| 57 |
"feat_proj_dropout": 0.0,
|
| 58 |
"feat_quantizer_dropout": 0.0,
|
| 59 |
"feature_projection_input_dim": 160,
|
|
|
|
| 82 |
"num_negatives": 100,
|
| 83 |
"output_hidden_size": 1024,
|
| 84 |
"output_hidden_states": true,
|
| 85 |
+
"pad_token_id": 0,
|
| 86 |
"position_embeddings_type": "relative_key",
|
| 87 |
"proj_codevector_dim": 768,
|
| 88 |
"right_max_position_embeddings": 8,
|
|
|
|
| 114 |
"xvector_output_dim": 512
|
| 115 |
},
|
| 116 |
"semantic_model_id": "facebook/w2v-bert-2.0",
|
| 117 |
+
"transformers_version": "5.3.0.dev0",
|
| 118 |
"vq_dim": 2048,
|
| 119 |
"vq_levels": [
|
| 120 |
4,
|