{
  "hift": {
    "in_channels": 80,
    "base_channels": 512,
    "nb_harmonics": 8,
    "sampling_rate": 22050,
    "nsf_alpha": 0.1,
    "nsf_sigma": 0.003,
    "nsf_voiced_threshold": 10,
    "upsample_rates": [
      8,
      8
    ],
    "upsample_kernel_sizes": [
      16,
      16
    ],
    "istft_params": {
      "n_fft": 16,
      "hop_len": 4
    },
    "resblock_kernel_sizes": [
      3,
      7,
      11
    ],
    "resblock_dilation_sizes": [
      [
        1,
        3,
        5
      ],
      [
        1,
        3,
        5
      ],
      [
        1,
        3,
        5
      ]
    ],
    "source_resblock_kernel_sizes": [
      7,
      11
    ],
    "source_resblock_dilation_sizes": [
      [
        1,
        3,
        5
      ],
      [
        1,
        3,
        5
      ]
    ],
    "lrelu_slope": 0.1,
    "audio_limit": 0.99
  },
  "f0_predictor": {
    "num_class": 1,
    "in_channels": 80,
    "cond_channels": 512
  },
  "pretrained_model_path": "hift.pt"
}