File size: 959 Bytes
59b7eeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
codec_encoder:
ngf: 48
hidden_dim: 1024
depth: 1
heads: 16
pos_meb_dim: 64
up_ratios: [3, 2, 4, 5, 8]
dilations: [1, 3, 9]
codec_decoder:
ngf: 48
up_ratios: [8, 5, 4, 2, 3] # Reverse of encoder
dilations: [1, 3, 9]
hidden_dim: 1024
depth: 12
heads: 16
pos_meb_dim: 64
hop_length: 960
vq_num_quantizers: 9
vq_dim: 1024
vq_commit_weight: 0.25
vq_weight_init: False
vq_full_commit_loss: False
codebook_size: 1024
codebook_dim: 64
mpd:
periods: [2, 3, 5, 7, 11]
max_downsample_channels: 512
channels: 16
channel_increasing_factor: 4
mstft:
stft_params:
fft_sizes: [78, 126, 206, 334, 542, 876, 1418, 2296]
hop_sizes: [39, 63, 103, 167, 271, 438, 709, 1148]
win_lengths: [78, 126, 206, 334, 542, 876, 1418, 2296]
window: hann_window
in_channels: 1
out_channels: 1
kernel_sizes: [5, 3]
channels: 32
max_downsample_channels: 512
downsample_scales: [2, 2, 2]
use_weight_norm: True |