backtracking commited on
Commit
dcba272
·
verified ·
1 Parent(s): a0d48a5

Upload tiny_tts/utils/config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. tiny_tts/utils/config.py +10 -9
tiny_tts/utils/config.py CHANGED
@@ -5,18 +5,19 @@ HOP_LENGTH = 512
5
  SEGMENT_FRAMES = 32
6
  ADD_BLANK = True
7
  SPEC_CHANNELS = FILTER_LENGTH // 2 + 1 # 1025
 
8
 
9
  # Speakers
10
  N_SPEAKERS = 1
11
- SPK2ID = {"female": 0}
12
 
13
- # Model
14
  MODEL_PARAMS = dict(
15
  use_spk_conditioned_encoder=True,
16
  use_noise_scaled_mas=True,
17
- inter_channels=80,
18
- hidden_channels=80,
19
- filter_channels=320,
20
  n_heads=2,
21
  n_layers=3,
22
  n_layers_trans_flow=3,
@@ -26,16 +27,16 @@ MODEL_PARAMS = dict(
26
  resblock_kernel_sizes=[3, 7, 11],
27
  resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
28
  upsample_rates=[8, 8, 2, 2, 2],
29
- upsample_initial_channel=256,
30
  upsample_kernel_sizes=[16, 16, 8, 2, 2],
31
  n_layers_q=3,
32
  use_spectral_norm=False,
33
- gin_channels=80,
34
  use_sdp=True,
35
  mas_noise_scale_initial=0.01,
36
  noise_scale_delta=2e-06,
37
  )
38
 
39
  # Language / Tone
40
- NUM_LANGUAGES = 10
41
- NUM_TONES = 16
 
5
  SEGMENT_FRAMES = 32
6
  ADD_BLANK = True
7
  SPEC_CHANNELS = FILTER_LENGTH // 2 + 1 # 1025
8
+ N_MEL_CHANNELS = 128 # updated in new checkpoint
9
 
10
  # Speakers
11
  N_SPEAKERS = 1
12
+ SPK2ID = {"MALE": 0}
13
 
14
+ # Model — matches config.json for G_150000.pth (lighter version)
15
  MODEL_PARAMS = dict(
16
  use_spk_conditioned_encoder=True,
17
  use_noise_scaled_mas=True,
18
+ inter_channels=32,
19
+ hidden_channels=32,
20
+ filter_channels=128,
21
  n_heads=2,
22
  n_layers=3,
23
  n_layers_trans_flow=3,
 
27
  resblock_kernel_sizes=[3, 7, 11],
28
  resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
29
  upsample_rates=[8, 8, 2, 2, 2],
30
+ upsample_initial_channel=64,
31
  upsample_kernel_sizes=[16, 16, 8, 2, 2],
32
  n_layers_q=3,
33
  use_spectral_norm=False,
34
+ gin_channels=128,
35
  use_sdp=True,
36
  mas_noise_scale_initial=0.01,
37
  noise_scale_delta=2e-06,
38
  )
39
 
40
  # Language / Tone
41
+ NUM_LANGUAGES = 1
42
+ NUM_TONES = 6