File size: 1,074 Bytes
c0c84cf
 
 
 
 
 
 
dcba272
c0c84cf
 
 
dcba272
c0c84cf
dcba272
c0c84cf
 
 
dcba272
 
 
c0c84cf
 
 
 
 
 
 
 
 
dcba272
c0c84cf
 
 
dcba272
c0c84cf
 
 
 
 
 
dcba272
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Audio
SAMPLING_RATE = 44100
FILTER_LENGTH = 2048
HOP_LENGTH = 512
SEGMENT_FRAMES = 32
ADD_BLANK = True
SPEC_CHANNELS = FILTER_LENGTH // 2 + 1  # 1025
N_MEL_CHANNELS = 128                      # updated in new checkpoint

# Speakers
N_SPEAKERS = 1
SPK2ID = {"MALE": 0}

# Model — matches config.json for G_150000.pth (lighter version)
MODEL_PARAMS = dict(
    use_spk_conditioned_encoder=True,
    use_noise_scaled_mas=True,
    inter_channels=32,
    hidden_channels=32,
    filter_channels=128,
    n_heads=2,
    n_layers=3,
    n_layers_trans_flow=3,
    kernel_size=3,
    p_dropout=0.1,
    resblock="1",
    resblock_kernel_sizes=[3, 7, 11],
    resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
    upsample_rates=[8, 8, 2, 2, 2],
    upsample_initial_channel=64,
    upsample_kernel_sizes=[16, 16, 8, 2, 2],
    n_layers_q=3,
    use_spectral_norm=False,
    gin_channels=128,
    use_sdp=True,
    mas_noise_scale_initial=0.01,
    noise_scale_delta=2e-06,
)

# Language / Tone
NUM_LANGUAGES = 1
NUM_TONES = 6