File size: 1,112 Bytes
2e6a07d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
## Base audio configs
normalize: true # zscore input waveforms
sr: 16000
ft_sr: 50
## Source feature configs
crepe_model: full
device: cuda
fmax: 550
fmin: 50
pitch_q: 2
periodicity_threshold: 0.0
reflect_loudness: false
loudness_threshold: 0.05 
use_penn: false
## Articulatory Inversion configs
speech_model: microsoft/wavlm-large
spk_ft_size: 1024
target_layer: 9
freqcut: 10
## Hifi-GAN configs
generator_configs:
  bias: true
  channels: 512
  in_channels: 14
  kernel_size: 7
  nonlinear_activation: LeakyReLU
  nonlinear_activation_params:
    negative_slope: 0.1
  out_channels: 1
  resblock_dilations:
  - - 1
    - 3
    - 5
  - - 1
    - 3
    - 5
  - - 1
    - 3
    - 5
  resblock_kernel_sizes:
  - 3
  - 7
  - 11
  spk_emb_size: 64
  upsample_kernel_sizes:
  - 16
  - 10
  - 8
  - 4
  upsample_scales:
  - 8
  - 5
  - 4
  - 2
  use_additional_convs: true
  use_weight_norm: true
  pitch_offset: 50
  pitch_rescale: 0.01
  pitch_axis: 12
## Speaker encoder configs
spk_emb_size: 64
spk_target_layer: 0
## Checkpoint Info
all_ckpt: null
linear_model_path: null
generator_ckpt: null
spk_ft_ckpt: null