{ "feature_extractor": { "class_path": "vocos.feature_extractors.EncodecFeatures", "init_args": { "encodec_model": "encodec_24khz", "bandwidths": [ 1.5, 3.0, 6.0, 12.0 ], "train_codebooks": false } }, "backbone": { "class_path": "vocos.models.VocosBackbone", "init_args": { "input_channels": 128, "dim": 384, "intermediate_dim": 1152, "num_layers": 8, "adanorm_num_embeddings": 4 } }, "head": { "class_path": "vocos.heads.ISTFTHead", "init_args": { "dim": 384, "n_fft": 1280, "hop_length": 320, "padding": "same" } } }