File size: 959 Bytes
59b7eeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
codec_encoder:
  ngf: 48
  hidden_dim: 1024
  depth: 1
  heads: 16
  pos_meb_dim: 64 
  up_ratios: [3, 2, 4, 5, 8]
  dilations: [1, 3, 9]
codec_decoder:
  ngf: 48
  up_ratios: [8, 5, 4, 2, 3]  # Reverse of encoder
  dilations: [1, 3, 9]
  hidden_dim: 1024
  depth: 12
  heads: 16
  pos_meb_dim: 64 
  hop_length: 960
  vq_num_quantizers: 9
  vq_dim: 1024
  vq_commit_weight: 0.25
  vq_weight_init: False
  vq_full_commit_loss: False
  codebook_size: 1024
  codebook_dim: 64
mpd:
  periods: [2, 3, 5, 7, 11]
  max_downsample_channels: 512
  channels: 16
  channel_increasing_factor: 4
mstft:
  stft_params:
    fft_sizes: [78, 126, 206, 334, 542, 876, 1418, 2296]
    hop_sizes: [39, 63, 103, 167, 271, 438, 709, 1148]
    win_lengths: [78, 126, 206, 334, 542, 876, 1418, 2296]
    window: hann_window
  in_channels: 1
  out_channels: 1
  kernel_sizes: [5, 3]
  channels: 32
  max_downsample_channels: 512
  downsample_scales: [2, 2, 2]
  use_weight_norm: True