Speech-Articulatory-Coding / model_multiling.yaml
cheoljun95's picture
Upload 4 files
2e6a07d verified
## Base audio configs
normalize: true # zscore input waveforms
sr: 16000
ft_sr: 50
## Source feature configs
crepe_model: full
device: cuda
fmax: 550
fmin: 50
pitch_q: 2
periodicity_threshold: 0.0
reflect_loudness: false
loudness_threshold: 0.05
use_penn: false
## Articulatory Inversion configs
speech_model: microsoft/wavlm-large
spk_ft_size: 1024
target_layer: 9
freqcut: 10
## Hifi-GAN configs
generator_configs:
bias: true
channels: 512
in_channels: 14
kernel_size: 7
nonlinear_activation: LeakyReLU
nonlinear_activation_params:
negative_slope: 0.1
out_channels: 1
resblock_dilations:
- - 1
- 3
- 5
- - 1
- 3
- 5
- - 1
- 3
- 5
resblock_kernel_sizes:
- 3
- 7
- 11
spk_emb_size: 64
upsample_kernel_sizes:
- 16
- 10
- 8
- 4
upsample_scales:
- 8
- 5
- 4
- 2
use_additional_convs: true
use_weight_norm: true
pitch_offset: 50
pitch_rescale: 0.01
pitch_axis: 12
## Speaker encoder configs
spk_emb_size: 64
spk_target_layer: 0
## Checkpoint Info
all_ckpt: null
linear_model_path: null
generator_ckpt: null
spk_ft_ckpt: null