codec_encoder: ngf: 48 hidden_dim: 1024 depth: 1 heads: 16 pos_meb_dim: 64 up_ratios: [3, 2, 4, 5, 8] dilations: [1, 3, 9] codec_decoder: ngf: 48 up_ratios: [8, 5, 4, 2, 3] # Reverse of encoder dilations: [1, 3, 9] hidden_dim: 1024 depth: 12 heads: 16 pos_meb_dim: 64 hop_length: 960 vq_num_quantizers: 9 vq_dim: 1024 vq_commit_weight: 0.25 vq_weight_init: False vq_full_commit_loss: False codebook_size: 1024 codebook_dim: 64 mpd: periods: [2, 3, 5, 7, 11] max_downsample_channels: 512 channels: 16 channel_increasing_factor: 4 mstft: stft_params: fft_sizes: [78, 126, 206, 334, 542, 876, 1418, 2296] hop_sizes: [39, 63, 103, 167, 271, 438, 709, 1148] win_lengths: [78, 126, 206, 334, 542, 876, 1418, 2296] window: hann_window in_channels: 1 out_channels: 1 kernel_sizes: [5, 3] channels: 32 max_downsample_channels: 512 downsample_scales: [2, 2, 2] use_weight_norm: True