Upload 4 files

Files changed (4) hide show

feature_extraction.yaml ADDED Viewed

+##### Configuration for extracting features for training
+##### Generator and is skipped.
+## Base audio configs
+normalize: true # zscore input waveforms
+sr: 16000
+ft_sr: 50
+## Source feature configs
+crepe_model: full
+device: cuda
+fmax: 550
+fmin: 50
+pitch_q: 4
+periodicity_threshold: 0.0
+reflect_loudness: false
+loudness_threshold: 0.05 #1
+use_penn: false
+## Articulatory Inversion configs
+speech_model: microsoft/wavlm-large
+spk_ft_size: 1024
+target_layer: 9
+freqcut: 10
+## Hifi-GAN configs
+generator_configs: null
+## Checkpoint Info
+all_ckpt: null
+linear_model_path: null
+generator_ckpt: null
+spk_ft_ckpt: null

model_english_1500k.yaml ADDED Viewed

+## Base audio configs
+normalize: true # zscore input waveforms
+sr: 16000
+ft_sr: 50
+## Source feature configs
+crepe_model: full
+device: cuda
+fmax: 550
+fmin: 50
+pitch_q: 4
+periodicity_threshold: 0.0
+reflect_loudness: false
+loudness_threshold: 0.05 #1
+use_penn: false
+## Articulatory Inversion configs
+speech_model: microsoft/wavlm-large
+spk_ft_size: 1024
+target_layer: 9
+freqcut: 10
+## Hifi-GAN configs
+generator_configs:
+  bias: true
+  channels: 512
+  in_channels: 14
+  kernel_size: 7
+  nonlinear_activation: LeakyReLU
+  nonlinear_activation_params:
+    negative_slope: 0.1
+  out_channels: 1
+  resblock_dilations:
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  resblock_kernel_sizes:
+  - 3
+  - 7
+  - 11
+  spk_emb_size: 64
+  upsample_kernel_sizes:
+  - 16
+  - 10
+  - 8
+  - 4
+  upsample_scales:
+  - 8
+  - 5
+  - 4
+  - 2
+  use_additional_convs: true
+  use_weight_norm: true
+  pitch_offset: 50
+  pitch_rescale: 0.01
+  pitch_axis: 12
+## Speaker encoder configs
+spk_emb_size: 64
+spk_target_layer: 0
+## Checkpoint Info
+all_ckpt: null
+linear_model_path: null
+generator_ckpt: null
+spk_ft_ckpt: null

model_englishplus_2M.yaml ADDED Viewed

+## Base audio configs
+normalize: true # zscore input waveforms
+sr: 16000
+ft_sr: 50
+## Source feature configs
+crepe_model: full
+device: cuda
+fmax: 550
+fmin: 50
+pitch_q: 2
+periodicity_threshold: 0.4
+reflect_loudness: true
+loudness_threshold: 0.05 #1
+use_penn: true
+## Articulatory Inversion configs
+speech_model: microsoft/wavlm-large
+spk_ft_size: 1024
+target_layer: 9
+freqcut: 10
+## Hifi-GAN configs
+generator_configs:
+  bias: true
+  channels: 512
+  in_channels: 14
+  kernel_size: 7
+  nonlinear_activation: LeakyReLU
+  nonlinear_activation_params:
+    negative_slope: 0.1
+  out_channels: 1
+  resblock_dilations:
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  resblock_kernel_sizes:
+  - 3
+  - 7
+  - 11
+  spk_emb_size: 64
+  upsample_kernel_sizes:
+  - 16
+  - 10
+  - 8
+  - 4
+  upsample_scales:
+  - 8
+  - 5
+  - 4
+  - 2
+  use_additional_convs: true
+  use_weight_norm: true
+  pitch_offset: 50
+  pitch_rescale: 0.01
+  pitch_axis: 12
+## Speaker encoder configs
+spk_emb_size: 64
+spk_target_layer: 6
+## Checkpoint Info
+all_ckpt: null
+linear_model_path: null
+generator_ckpt: null
+spk_ft_ckpt: null

model_multiling.yaml ADDED Viewed

+## Base audio configs
+normalize: true # zscore input waveforms
+sr: 16000
+ft_sr: 50
+## Source feature configs
+crepe_model: full
+device: cuda
+fmax: 550
+fmin: 50
+pitch_q: 2
+periodicity_threshold: 0.0
+reflect_loudness: false
+loudness_threshold: 0.05
+use_penn: false
+## Articulatory Inversion configs
+speech_model: microsoft/wavlm-large
+spk_ft_size: 1024
+target_layer: 9
+freqcut: 10
+## Hifi-GAN configs
+generator_configs:
+  bias: true
+  channels: 512
+  in_channels: 14
+  kernel_size: 7
+  nonlinear_activation: LeakyReLU
+  nonlinear_activation_params:
+    negative_slope: 0.1
+  out_channels: 1
+  resblock_dilations:
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  resblock_kernel_sizes:
+  - 3
+  - 7
+  - 11
+  spk_emb_size: 64
+  upsample_kernel_sizes:
+  - 16
+  - 10
+  - 8
+  - 4
+  upsample_scales:
+  - 8
+  - 5
+  - 4
+  - 2
+  use_additional_convs: true
+  use_weight_norm: true
+  pitch_offset: 50
+  pitch_rescale: 0.01
+  pitch_axis: 12
+## Speaker encoder configs
+spk_emb_size: 64
+spk_target_layer: 0
+## Checkpoint Info
+all_ckpt: null
+linear_model_path: null
+generator_ckpt: null
+spk_ft_ckpt: null