| { |
| "hift": { |
| "in_channels": 80, |
| "base_channels": 512, |
| "nb_harmonics": 8, |
| "sampling_rate": 22050, |
| "nsf_alpha": 0.1, |
| "nsf_sigma": 0.003, |
| "nsf_voiced_threshold": 10, |
| "upsample_rates": [ |
| 8, |
| 8 |
| ], |
| "upsample_kernel_sizes": [ |
| 16, |
| 16 |
| ], |
| "istft_params": { |
| "n_fft": 16, |
| "hop_len": 4 |
| }, |
| "resblock_kernel_sizes": [ |
| 3, |
| 7, |
| 11 |
| ], |
| "resblock_dilation_sizes": [ |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ] |
| ], |
| "source_resblock_kernel_sizes": [ |
| 7, |
| 11 |
| ], |
| "source_resblock_dilation_sizes": [ |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ] |
| ], |
| "lrelu_slope": 0.1, |
| "audio_limit": 0.99 |
| }, |
| "f0_predictor": { |
| "num_class": 1, |
| "in_channels": 80, |
| "cond_channels": 512 |
| }, |
| "pretrained_model_path": "hift.pt" |
| } |
|
|