Upload 2 files
Browse files- config.json +3 -0
- xtts2_config.py +6 -1
config.json
CHANGED
|
@@ -30,10 +30,12 @@
|
|
| 30 |
"gpt_config": {
|
| 31 |
"_attn_implementation_autoset": false,
|
| 32 |
"_name_or_path": "",
|
|
|
|
| 33 |
"add_cross_attention": false,
|
| 34 |
"architectures": [
|
| 35 |
"XttsGPT"
|
| 36 |
],
|
|
|
|
| 37 |
"audio_config": {
|
| 38 |
"mel_channels": 80,
|
| 39 |
"output_sample_rate": 24000,
|
|
@@ -83,6 +85,7 @@
|
|
| 83 |
"max_text_tokens": 402,
|
| 84 |
"min_length": 0,
|
| 85 |
"model_type": "xtts_gpt",
|
|
|
|
| 86 |
"no_repeat_ngram_size": 0,
|
| 87 |
"num_attention_heads": 16,
|
| 88 |
"num_audio_tokens": 1026,
|
|
|
|
| 30 |
"gpt_config": {
|
| 31 |
"_attn_implementation_autoset": false,
|
| 32 |
"_name_or_path": "",
|
| 33 |
+
"activation_function": "gelu",
|
| 34 |
"add_cross_attention": false,
|
| 35 |
"architectures": [
|
| 36 |
"XttsGPT"
|
| 37 |
],
|
| 38 |
+
"attn_pdrop": 0.1,
|
| 39 |
"audio_config": {
|
| 40 |
"mel_channels": 80,
|
| 41 |
"output_sample_rate": 24000,
|
|
|
|
| 85 |
"max_text_tokens": 402,
|
| 86 |
"min_length": 0,
|
| 87 |
"model_type": "xtts_gpt",
|
| 88 |
+
"n_inner": 4098,
|
| 89 |
"no_repeat_ngram_size": 0,
|
| 90 |
"num_attention_heads": 16,
|
| 91 |
"num_audio_tokens": 1026,
|
xtts2_config.py
CHANGED
|
@@ -36,6 +36,7 @@ class XTTSGPTConfig(PretrainedConfig):
|
|
| 36 |
self,
|
| 37 |
# Model architecture
|
| 38 |
hidden_size: int = 1024, # gpt_n_model_channels in original
|
|
|
|
| 39 |
num_hidden_layers: int = 30, # gpt_layers in original
|
| 40 |
num_attention_heads: int = 16, # gpt_n_heads in original
|
| 41 |
|
|
@@ -82,6 +83,8 @@ class XTTSGPTConfig(PretrainedConfig):
|
|
| 82 |
"AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
|
| 83 |
"AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
|
| 84 |
},
|
|
|
|
|
|
|
| 85 |
**kwargs
|
| 86 |
):
|
| 87 |
super().__init__(**kwargs)
|
|
@@ -90,8 +93,10 @@ class XTTSGPTConfig(PretrainedConfig):
|
|
| 90 |
self.audio_config = GPTAudioConfig(
|
| 91 |
**audio_config if audio_config is not None else {}
|
| 92 |
)
|
| 93 |
-
|
|
|
|
| 94 |
self.hidden_size = hidden_size
|
|
|
|
| 95 |
self.num_hidden_layers = num_hidden_layers
|
| 96 |
self.num_attention_heads = num_attention_heads
|
| 97 |
|
|
|
|
| 36 |
self,
|
| 37 |
# Model architecture
|
| 38 |
hidden_size: int = 1024, # gpt_n_model_channels in original
|
| 39 |
+
n_inner: int = 4098,
|
| 40 |
num_hidden_layers: int = 30, # gpt_layers in original
|
| 41 |
num_attention_heads: int = 16, # gpt_n_heads in original
|
| 42 |
|
|
|
|
| 83 |
"AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
|
| 84 |
"AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
|
| 85 |
},
|
| 86 |
+
activation_function: str = "gelu",
|
| 87 |
+
attn_pdrop: float = 0.1,
|
| 88 |
**kwargs
|
| 89 |
):
|
| 90 |
super().__init__(**kwargs)
|
|
|
|
| 93 |
self.audio_config = GPTAudioConfig(
|
| 94 |
**audio_config if audio_config is not None else {}
|
| 95 |
)
|
| 96 |
+
self.activation_function = activation_function
|
| 97 |
+
self.attn_pdrop = attn_pdrop
|
| 98 |
self.hidden_size = hidden_size
|
| 99 |
+
self.n_inner = n_inner
|
| 100 |
self.num_hidden_layers = num_hidden_layers
|
| 101 |
self.num_attention_heads = num_attention_heads
|
| 102 |
|