Spaces:
Running
Running
File size: 1,074 Bytes
c0c84cf dcba272 c0c84cf dcba272 c0c84cf dcba272 c0c84cf dcba272 c0c84cf dcba272 c0c84cf dcba272 c0c84cf dcba272 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | # Audio
SAMPLING_RATE = 44100
FILTER_LENGTH = 2048
HOP_LENGTH = 512
SEGMENT_FRAMES = 32
ADD_BLANK = True
SPEC_CHANNELS = FILTER_LENGTH // 2 + 1 # 1025
N_MEL_CHANNELS = 128 # updated in new checkpoint
# Speakers
N_SPEAKERS = 1
SPK2ID = {"MALE": 0}
# Model — matches config.json for G_150000.pth (lighter version)
MODEL_PARAMS = dict(
use_spk_conditioned_encoder=True,
use_noise_scaled_mas=True,
inter_channels=32,
hidden_channels=32,
filter_channels=128,
n_heads=2,
n_layers=3,
n_layers_trans_flow=3,
kernel_size=3,
p_dropout=0.1,
resblock="1",
resblock_kernel_sizes=[3, 7, 11],
resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
upsample_rates=[8, 8, 2, 2, 2],
upsample_initial_channel=64,
upsample_kernel_sizes=[16, 16, 8, 2, 2],
n_layers_q=3,
use_spectral_norm=False,
gin_channels=128,
use_sdp=True,
mas_noise_scale_initial=0.01,
noise_scale_delta=2e-06,
)
# Language / Tone
NUM_LANGUAGES = 1
NUM_TONES = 6
|