{ "ch": 128, "in_channels": 2, "ch_mult": [ 1, 2, 4 ], "num_res_blocks": 2, "attn_resolutions": [], "resolution": 256, "z_channels": 8, "double_z": true, "n_fft": 1024, "norm_type": "pixel", "causality_axis": "height", "dropout": 0.0, "mid_block_add_attention": false, "sample_rate": 16000, "mel_hop_length": 160, "is_causal": true, "mel_bins": 64, "resamp_with_conv": true, "attn_type": "vanilla" }