{ "hift": { "in_channels": 80, "base_channels": 512, "nb_harmonics": 8, "sampling_rate": 22050, "nsf_alpha": 0.1, "nsf_sigma": 0.003, "nsf_voiced_threshold": 10, "upsample_rates": [ 8, 8 ], "upsample_kernel_sizes": [ 16, 16 ], "istft_params": { "n_fft": 16, "hop_len": 4 }, "resblock_kernel_sizes": [ 3, 7, 11 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "source_resblock_kernel_sizes": [ 7, 11 ], "source_resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ] ], "lrelu_slope": 0.1, "audio_limit": 0.99 }, "f0_predictor": { "num_class": 1, "in_channels": 80, "cond_channels": 512 }, "pretrained_model_path": "hift.pt" }