File size: 1,112 Bytes
2e6a07d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
## Base audio configs
normalize: true # zscore input waveforms
sr: 16000
ft_sr: 50
## Source feature configs
crepe_model: full
device: cuda
fmax: 550
fmin: 50
pitch_q: 2
periodicity_threshold: 0.0
reflect_loudness: false
loudness_threshold: 0.05
use_penn: false
## Articulatory Inversion configs
speech_model: microsoft/wavlm-large
spk_ft_size: 1024
target_layer: 9
freqcut: 10
## Hifi-GAN configs
generator_configs:
bias: true
channels: 512
in_channels: 14
kernel_size: 7
nonlinear_activation: LeakyReLU
nonlinear_activation_params:
negative_slope: 0.1
out_channels: 1
resblock_dilations:
- - 1
- 3
- 5
- - 1
- 3
- 5
- - 1
- 3
- 5
resblock_kernel_sizes:
- 3
- 7
- 11
spk_emb_size: 64
upsample_kernel_sizes:
- 16
- 10
- 8
- 4
upsample_scales:
- 8
- 5
- 4
- 2
use_additional_convs: true
use_weight_norm: true
pitch_offset: 50
pitch_rescale: 0.01
pitch_axis: 12
## Speaker encoder configs
spk_emb_size: 64
spk_target_layer: 0
## Checkpoint Info
all_ckpt: null
linear_model_path: null
generator_ckpt: null
spk_ft_ckpt: null
|