## Base audio configs normalize: true # zscore input waveforms sr: 16000 ft_sr: 50 ## Source feature configs crepe_model: full device: cuda fmax: 550 fmin: 50 pitch_q: 2 periodicity_threshold: 0.0 reflect_loudness: false loudness_threshold: 0.05 use_penn: false ## Articulatory Inversion configs speech_model: microsoft/wavlm-large spk_ft_size: 1024 target_layer: 9 freqcut: 10 ## Hifi-GAN configs generator_configs: bias: true channels: 512 in_channels: 14 kernel_size: 7 nonlinear_activation: LeakyReLU nonlinear_activation_params: negative_slope: 0.1 out_channels: 1 resblock_dilations: - - 1 - 3 - 5 - - 1 - 3 - 5 - - 1 - 3 - 5 resblock_kernel_sizes: - 3 - 7 - 11 spk_emb_size: 64 upsample_kernel_sizes: - 16 - 10 - 8 - 4 upsample_scales: - 8 - 5 - 4 - 2 use_additional_convs: true use_weight_norm: true pitch_offset: 50 pitch_rescale: 0.01 pitch_axis: 12 ## Speaker encoder configs spk_emb_size: 64 spk_target_layer: 0 ## Checkpoint Info all_ckpt: null linear_model_path: null generator_ckpt: null spk_ft_ckpt: null