Vocos-Encodec / config.json
Manel's picture
Upload model
50c21dd verified
{
"adanorm_num_embeddings": 4,
"architectures": [
"VocosWithEncodecModel"
],
"bandwidths": [
1.5,
3.0,
6.0,
12.0
],
"encodec_config": {
"audio_channels": 1,
"chunk_length_s": null,
"codebook_dim": 128,
"codebook_size": 1024,
"compress": 2,
"dilation_growth_rate": 2,
"hidden_size": 128,
"kernel_size": 7,
"last_kernel_size": 7,
"model_type": "encodec",
"norm_type": "weight_norm",
"normalize": false,
"num_filters": 32,
"num_lstm_layers": 2,
"num_residual_layers": 1,
"overlap": null,
"pad_mode": "reflect",
"residual_kernel_size": 3,
"sampling_rate": 24000,
"target_bandwidths": [
1.5,
3.0,
6.0,
12.0,
24.0
],
"trim_right_ratio": 1.0,
"upsampling_ratios": [
8,
5,
4,
2
],
"use_causal_conv": true,
"use_conv_shortcut": true
},
"hidden_dim": 384,
"hop_length": 320,
"input_channels": 128,
"intermediate_dim": 1152,
"kernel_size": 7,
"layer_norm_eps": 1e-06,
"layer_scale_init_value": 0.125,
"model_type": "vocos_with_encodec",
"n_fft": 1280,
"num_layers": 8,
"padding": 3,
"spec_padding": "same",
"torch_dtype": "float32",
"train_codebooks": false,
"transformers_version": "4.55.2",
"use_adaptive_norm": true
}