Spaces:
Running
Running
| # Audio | |
| SAMPLING_RATE = 44100 | |
| FILTER_LENGTH = 2048 | |
| HOP_LENGTH = 512 | |
| SEGMENT_FRAMES = 32 | |
| ADD_BLANK = True | |
| SPEC_CHANNELS = FILTER_LENGTH // 2 + 1 # 1025 | |
| N_MEL_CHANNELS = 128 # updated in new checkpoint | |
| # Speakers | |
| N_SPEAKERS = 1 | |
| SPK2ID = {"MALE": 0} | |
| # Model — matches config.json for G_150000.pth (lighter version) | |
| MODEL_PARAMS = dict( | |
| use_spk_conditioned_encoder=True, | |
| use_noise_scaled_mas=True, | |
| inter_channels=32, | |
| hidden_channels=32, | |
| filter_channels=128, | |
| n_heads=2, | |
| n_layers=3, | |
| n_layers_trans_flow=3, | |
| kernel_size=3, | |
| p_dropout=0.1, | |
| resblock="1", | |
| resblock_kernel_sizes=[3, 7, 11], | |
| resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]], | |
| upsample_rates=[8, 8, 2, 2, 2], | |
| upsample_initial_channel=64, | |
| upsample_kernel_sizes=[16, 16, 8, 2, 2], | |
| n_layers_q=3, | |
| use_spectral_norm=False, | |
| gin_channels=128, | |
| use_sdp=True, | |
| mas_noise_scale_initial=0.01, | |
| noise_scale_delta=2e-06, | |
| ) | |
| # Language / Tone | |
| NUM_LANGUAGES = 1 | |
| NUM_TONES = 6 | |