File size: 1,334 Bytes
cbbf289 50c21dd cbbf289 420746d cbbf289 50c21dd cbbf289 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
{
"adanorm_num_embeddings": 4,
"architectures": [
"VocosWithEncodecModel"
],
"bandwidths": [
1.5,
3.0,
6.0,
12.0
],
"encodec_config": {
"audio_channels": 1,
"chunk_length_s": null,
"codebook_dim": 128,
"codebook_size": 1024,
"compress": 2,
"dilation_growth_rate": 2,
"hidden_size": 128,
"kernel_size": 7,
"last_kernel_size": 7,
"model_type": "encodec",
"norm_type": "weight_norm",
"normalize": false,
"num_filters": 32,
"num_lstm_layers": 2,
"num_residual_layers": 1,
"overlap": null,
"pad_mode": "reflect",
"residual_kernel_size": 3,
"sampling_rate": 24000,
"target_bandwidths": [
1.5,
3.0,
6.0,
12.0,
24.0
],
"trim_right_ratio": 1.0,
"upsampling_ratios": [
8,
5,
4,
2
],
"use_causal_conv": true,
"use_conv_shortcut": true
},
"hidden_dim": 384,
"hop_length": 320,
"input_channels": 128,
"intermediate_dim": 1152,
"kernel_size": 7,
"layer_norm_eps": 1e-06,
"layer_scale_init_value": 0.125,
"model_type": "vocos_with_encodec",
"n_fft": 1280,
"num_layers": 8,
"padding": 3,
"spec_padding": "same",
"torch_dtype": "float32",
"train_codebooks": false,
"transformers_version": "4.55.2",
"use_adaptive_norm": true
}
|