|
|
codec_encoder: |
|
|
ngf: 48 |
|
|
hidden_dim: 1024 |
|
|
depth: 1 |
|
|
heads: 16 |
|
|
pos_meb_dim: 64 |
|
|
up_ratios: [3, 2, 4, 5, 8] |
|
|
dilations: [1, 3, 9] |
|
|
codec_decoder: |
|
|
ngf: 48 |
|
|
up_ratios: [8, 5, 4, 2, 3] |
|
|
dilations: [1, 3, 9] |
|
|
hidden_dim: 1024 |
|
|
depth: 12 |
|
|
heads: 16 |
|
|
pos_meb_dim: 64 |
|
|
hop_length: 960 |
|
|
vq_num_quantizers: 9 |
|
|
vq_dim: 1024 |
|
|
vq_commit_weight: 0.25 |
|
|
vq_weight_init: False |
|
|
vq_full_commit_loss: False |
|
|
codebook_size: 1024 |
|
|
codebook_dim: 64 |
|
|
mpd: |
|
|
periods: [2, 3, 5, 7, 11] |
|
|
max_downsample_channels: 512 |
|
|
channels: 16 |
|
|
channel_increasing_factor: 4 |
|
|
mstft: |
|
|
stft_params: |
|
|
fft_sizes: [78, 126, 206, 334, 542, 876, 1418, 2296] |
|
|
hop_sizes: [39, 63, 103, 167, 271, 438, 709, 1148] |
|
|
win_lengths: [78, 126, 206, 334, 542, 876, 1418, 2296] |
|
|
window: hann_window |
|
|
in_channels: 1 |
|
|
out_channels: 1 |
|
|
kernel_sizes: [5, 3] |
|
|
channels: 32 |
|
|
max_downsample_channels: 512 |
|
|
downsample_scales: [2, 2, 2] |
|
|
use_weight_norm: True |