Safetensors
tts
vc
svs
svc
music
Vevo2 / vocoder /config.json
RMSnow's picture
Upload folder using huggingface_hub
795b27d verified
{
"model_type": "Vocoder",
"preprocess": {
"hop_size": 480,
"sample_rate": 24000,
"max_length": 36000,
"n_fft": 1920,
"num_mels": 128,
"win_size": 1920,
"fmin": 0,
"fmax": 12000,
"mel_var": 8.14,
"mel_mean": -4.92,
"load_phone": false,
"load_chromagram": false
},
"model": {
"vocos": {
"input_channels": 128,
"dim": 1024,
"intermediate_dim": 4096,
"num_layers": 30,
"n_fft": 1920,
"hop_size": 480,
"padding": "same"
},
"period_gan": {
"max_downsample_channels": 1024,
"channels": 64,
"channel_increasing_factor": 2
},
"spec_gan": {
"stft_params": {
"fft_sizes": [
128,
256,
512,
1024,
2048
],
"hop_sizes": [
32,
64,
128,
256,
512
],
"win_lengths": [
128,
256,
512,
1024,
2048
],
"window": "hann_window"
},
"in_channels": 1,
"out_channels": 1,
"channels": 64,
"kernel_sizes": [
5,
3
],
"max_downsample_channels": 1024,
"down_scales": [
2,
2,
2
],
"use_weight_norm": true,
"use_complex": false
}
},
"loss": {
"mel_loss": {
"sample_rate": 24000
},
"disc_loss_weight": 1.0,
"mel_loss_weight": 10.0,
"adv_loss_weight": 2.0,
"fm_loss_weight": 2.0,
"spec_fm_loss_weight": 1.0
},
}