|
|
|
|
|
normalize: true |
|
|
sr: 16000 |
|
|
ft_sr: 50 |
|
|
|
|
|
crepe_model: full |
|
|
device: cuda |
|
|
fmax: 550 |
|
|
fmin: 50 |
|
|
pitch_q: 2 |
|
|
periodicity_threshold: 0.0 |
|
|
reflect_loudness: false |
|
|
loudness_threshold: 0.05 |
|
|
use_penn: false |
|
|
|
|
|
speech_model: microsoft/wavlm-large |
|
|
spk_ft_size: 1024 |
|
|
target_layer: 9 |
|
|
freqcut: 10 |
|
|
|
|
|
generator_configs: |
|
|
bias: true |
|
|
channels: 512 |
|
|
in_channels: 14 |
|
|
kernel_size: 7 |
|
|
nonlinear_activation: LeakyReLU |
|
|
nonlinear_activation_params: |
|
|
negative_slope: 0.1 |
|
|
out_channels: 1 |
|
|
resblock_dilations: |
|
|
- - 1 |
|
|
- 3 |
|
|
- 5 |
|
|
- - 1 |
|
|
- 3 |
|
|
- 5 |
|
|
- - 1 |
|
|
- 3 |
|
|
- 5 |
|
|
resblock_kernel_sizes: |
|
|
- 3 |
|
|
- 7 |
|
|
- 11 |
|
|
spk_emb_size: 64 |
|
|
upsample_kernel_sizes: |
|
|
- 16 |
|
|
- 10 |
|
|
- 8 |
|
|
- 4 |
|
|
upsample_scales: |
|
|
- 8 |
|
|
- 5 |
|
|
- 4 |
|
|
- 2 |
|
|
use_additional_convs: true |
|
|
use_weight_norm: true |
|
|
pitch_offset: 50 |
|
|
pitch_rescale: 0.01 |
|
|
pitch_axis: 12 |
|
|
|
|
|
spk_emb_size: 64 |
|
|
spk_target_layer: 0 |
|
|
|
|
|
all_ckpt: null |
|
|
linear_model_path: null |
|
|
generator_ckpt: null |
|
|
spk_ft_ckpt: null |
|
|
|