mlinmg commited on
Commit
2e771d9
·
verified ·
1 Parent(s): 2ebc88c

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.json +13 -9
  2. xtts2_config.py +29 -18
config.json CHANGED
@@ -1,14 +1,7 @@
1
  {
2
- "_name_or_path": "AstraMindAI/xtts2-gpt",
3
  "architectures": [
4
- "XttsGPT"
5
  ],
6
- "torch_dtype": "float32",
7
- "auto_map": {
8
- "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
9
- "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
10
- "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast"
11
- },
12
  "audio_config": {
13
  "fmax": 8000,
14
  "fmin": 0,
@@ -21,6 +14,11 @@
21
  "sample_rate": 22050,
22
  "win_length": 1024
23
  },
 
 
 
 
 
24
  "cond_d_vector_in_each_upsampling_layer": true,
25
  "d_vector_dim": 512,
26
  "decoder_input_dim": 1024,
@@ -33,12 +31,18 @@
33
  "_attn_implementation_autoset": false,
34
  "_name_or_path": "",
35
  "add_cross_attention": false,
36
- "architectures": null,
 
 
37
  "audio_config": {
38
  "mel_channels": 80,
39
  "output_sample_rate": 24000,
40
  "sample_rate": 22050
41
  },
 
 
 
 
42
  "bad_words_ids": null,
43
  "begin_suppress_tokens": null,
44
  "bos_token_id": null,
 
1
  {
 
2
  "architectures": [
3
+ "Xtts"
4
  ],
 
 
 
 
 
 
5
  "audio_config": {
6
  "fmax": 8000,
7
  "fmin": 0,
 
14
  "sample_rate": 22050,
15
  "win_length": 1024
16
  },
17
+ "auto_map": {
18
+ "AutoConfig": "AstraMindAI/xtts2--xtts2_config.XTTSConfig",
19
+ "AutoModelForCausalLM": "AstraMindAI/xtts2--xtts2_modeling.Xtts",
20
+ "AutoTokenizer": "AstraMindAI/xtts2--tokenizer.XTTSTokenizerFast"
21
+ },
22
  "cond_d_vector_in_each_upsampling_layer": true,
23
  "d_vector_dim": 512,
24
  "decoder_input_dim": 1024,
 
31
  "_attn_implementation_autoset": false,
32
  "_name_or_path": "",
33
  "add_cross_attention": false,
34
+ "architectures": [
35
+ "XttsGPT"
36
+ ],
37
  "audio_config": {
38
  "mel_channels": 80,
39
  "output_sample_rate": 24000,
40
  "sample_rate": 22050
41
  },
42
+ "auto_map": {
43
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
44
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT"
45
+ },
46
  "bad_words_ids": null,
47
  "begin_suppress_tokens": null,
48
  "bos_token_id": null,
xtts2_config.py CHANGED
@@ -13,6 +13,20 @@ class GPTAudioConfig:
13
  sample_rate: int = 22050
14
  output_sample_rate: int = 24000
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  class XTTSGPTConfig(PretrainedConfig):
18
  """Configuration class for the GPT component of XTTS."""
@@ -63,11 +77,16 @@ class XTTSGPTConfig(PretrainedConfig):
63
 
64
  # Size settings for the decoder
65
  decoder_input_dim: int = 1024,
66
-
 
 
 
 
67
  **kwargs
68
  ):
69
  super().__init__(**kwargs)
70
-
 
71
  self.audio_config = GPTAudioConfig(
72
  **audio_config if audio_config is not None else {}
73
  )
@@ -116,20 +135,6 @@ class XTTSGPTConfig(PretrainedConfig):
116
  """Create a config from a dictionary."""
117
  return cls(**config_dict)
118
 
119
- @dataclass
120
- class XTTSAudioConfig:
121
- """Configuration for audio processing parameters"""
122
- sample_rate: int = 22050
123
- output_sample_rate: int = 24000
124
- mel_channels: int = 80
125
- hop_length: int = 256
126
- win_length: int = 1024
127
- n_fft: int = 1024
128
- fmin: int = 0
129
- fmax: int = 8000
130
- power: float = 1.0
131
- mel_norms_file: Optional[str] = None
132
-
133
 
134
  class XTTSConfig(PretrainedConfig):
135
  """Configuration class for XTTS model components except GPT."""
@@ -161,11 +166,17 @@ class XTTSConfig(PretrainedConfig):
161
 
162
  # GPT configuration
163
  gpt_config: Optional[Dict] = None,
164
-
 
 
 
 
 
165
  **kwargs
166
  ):
167
  super().__init__(**kwargs)
168
-
 
169
  # Initialize audio config
170
  self.audio_config = XTTSAudioConfig(
171
  **audio_config if audio_config is not None else {}
 
13
  sample_rate: int = 22050
14
  output_sample_rate: int = 24000
15
 
16
+ @dataclass
17
+ class XTTSAudioConfig:
18
+ """Configuration for audio processing parameters"""
19
+ sample_rate: int = 22050
20
+ output_sample_rate: int = 24000
21
+ mel_channels: int = 80
22
+ hop_length: int = 256
23
+ win_length: int = 1024
24
+ n_fft: int = 1024
25
+ fmin: int = 0
26
+ fmax: int = 8000
27
+ power: float = 1.0
28
+ mel_norms_file: Optional[str] = None
29
+
30
 
31
  class XTTSGPTConfig(PretrainedConfig):
32
  """Configuration class for the GPT component of XTTS."""
 
77
 
78
  # Size settings for the decoder
79
  decoder_input_dim: int = 1024,
80
+ architectures=["XttsGPT"],
81
+ auto_map = {
82
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
83
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
84
+ },
85
  **kwargs
86
  ):
87
  super().__init__(**kwargs)
88
+ self.architectures = architectures
89
+ self.auto_map = auto_map
90
  self.audio_config = GPTAudioConfig(
91
  **audio_config if audio_config is not None else {}
92
  )
 
135
  """Create a config from a dictionary."""
136
  return cls(**config_dict)
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  class XTTSConfig(PretrainedConfig):
140
  """Configuration class for XTTS model components except GPT."""
 
166
 
167
  # GPT configuration
168
  gpt_config: Optional[Dict] = None,
169
+ architectures=["Xtts"],
170
+ auto_map = {
171
+ "AutoConfig": "AstraMindAI/xtts2--xtts2_config.XTTSConfig",
172
+ "AutoModelForCausalLM": "AstraMindAI/xtts2--xtts2_modeling.Xtts",
173
+ "AutoTokenizer": "AstraMindAI/xtts2--tokenizer.XTTSTokenizerFast"
174
+ },
175
  **kwargs
176
  ):
177
  super().__init__(**kwargs)
178
+ self.architectures = architectures
179
+ self.auto_map = auto_map
180
  # Initialize audio config
181
  self.audio_config = XTTSAudioConfig(
182
  **audio_config if audio_config is not None else {}