Spaces:
Running
Running
Upload tiny_tts/utils/config.py with huggingface_hub
Browse files- tiny_tts/utils/config.py +10 -9
tiny_tts/utils/config.py
CHANGED
|
@@ -5,18 +5,19 @@ HOP_LENGTH = 512
|
|
| 5 |
SEGMENT_FRAMES = 32
|
| 6 |
ADD_BLANK = True
|
| 7 |
SPEC_CHANNELS = FILTER_LENGTH // 2 + 1 # 1025
|
|
|
|
| 8 |
|
| 9 |
# Speakers
|
| 10 |
N_SPEAKERS = 1
|
| 11 |
-
SPK2ID = {"
|
| 12 |
|
| 13 |
-
# Model
|
| 14 |
MODEL_PARAMS = dict(
|
| 15 |
use_spk_conditioned_encoder=True,
|
| 16 |
use_noise_scaled_mas=True,
|
| 17 |
-
inter_channels=
|
| 18 |
-
hidden_channels=
|
| 19 |
-
filter_channels=
|
| 20 |
n_heads=2,
|
| 21 |
n_layers=3,
|
| 22 |
n_layers_trans_flow=3,
|
|
@@ -26,16 +27,16 @@ MODEL_PARAMS = dict(
|
|
| 26 |
resblock_kernel_sizes=[3, 7, 11],
|
| 27 |
resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 28 |
upsample_rates=[8, 8, 2, 2, 2],
|
| 29 |
-
upsample_initial_channel=
|
| 30 |
upsample_kernel_sizes=[16, 16, 8, 2, 2],
|
| 31 |
n_layers_q=3,
|
| 32 |
use_spectral_norm=False,
|
| 33 |
-
gin_channels=
|
| 34 |
use_sdp=True,
|
| 35 |
mas_noise_scale_initial=0.01,
|
| 36 |
noise_scale_delta=2e-06,
|
| 37 |
)
|
| 38 |
|
| 39 |
# Language / Tone
|
| 40 |
-
NUM_LANGUAGES =
|
| 41 |
-
NUM_TONES =
|
|
|
|
| 5 |
SEGMENT_FRAMES = 32
|
| 6 |
ADD_BLANK = True
|
| 7 |
SPEC_CHANNELS = FILTER_LENGTH // 2 + 1 # 1025
|
| 8 |
+
N_MEL_CHANNELS = 128 # updated in new checkpoint
|
| 9 |
|
| 10 |
# Speakers
|
| 11 |
N_SPEAKERS = 1
|
| 12 |
+
SPK2ID = {"MALE": 0}
|
| 13 |
|
| 14 |
+
# Model — matches config.json for G_150000.pth (lighter version)
|
| 15 |
MODEL_PARAMS = dict(
|
| 16 |
use_spk_conditioned_encoder=True,
|
| 17 |
use_noise_scaled_mas=True,
|
| 18 |
+
inter_channels=32,
|
| 19 |
+
hidden_channels=32,
|
| 20 |
+
filter_channels=128,
|
| 21 |
n_heads=2,
|
| 22 |
n_layers=3,
|
| 23 |
n_layers_trans_flow=3,
|
|
|
|
| 27 |
resblock_kernel_sizes=[3, 7, 11],
|
| 28 |
resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 29 |
upsample_rates=[8, 8, 2, 2, 2],
|
| 30 |
+
upsample_initial_channel=64,
|
| 31 |
upsample_kernel_sizes=[16, 16, 8, 2, 2],
|
| 32 |
n_layers_q=3,
|
| 33 |
use_spectral_norm=False,
|
| 34 |
+
gin_channels=128,
|
| 35 |
use_sdp=True,
|
| 36 |
mas_noise_scale_initial=0.01,
|
| 37 |
noise_scale_delta=2e-06,
|
| 38 |
)
|
| 39 |
|
| 40 |
# Language / Tone
|
| 41 |
+
NUM_LANGUAGES = 1
|
| 42 |
+
NUM_TONES = 6
|