File size: 3,637 Bytes

0d9b403

{
  "_target_": "modules.v2.vc_wrapper.VoiceConversionWrapper",
  "sr": 22050,
  "hop_size": 256,
  "mel_fn": {
    "_target_": "modules.audio.mel_spectrogram",
    "_partial_": true,
    "n_fft": 1024,
    "win_size": 1024,
    "hop_size": 256,
    "num_mels": 80,
    "sampling_rate": 22050,
    "fmin": 0,
    "fmax": null,
    "center": false
  },
  "cfm": {
    "_target_": "modules.v2.cfm.CFM",
    "estimator": {
      "_target_": "modules.v2.dit_wrapper.DiT",
      "time_as_token": true,
      "style_as_token": true,
      "uvit_skip_connection": false,
      "block_size": 8192,
      "depth": 13,
      "num_heads": 8,
      "hidden_dim": 512,
      "in_channels": 80,
      "content_dim": 512,
      "style_encoder_dim": 192,
      "class_dropout_prob": 0.1,
      "dropout_rate": 0.0,
      "attn_dropout_rate": 0.0
    }
  },
  "cfm_length_regulator": {
    "_target_": "modules.v2.length_regulator.InterpolateRegulator",
    "channels": 512,
    "is_discrete": true,
    "codebook_size": 2048,
    "sampling_ratios": [
      1,
      1,
      1,
      1
    ],
    "f0_condition": false
  },
  "ar": {
    "_target_": "modules.v2.ar.NaiveWrapper",
    "model": {
      "_target_": "modules.v2.ar.NaiveTransformer",
      "config": {
        "_target_": "modules.v2.ar.NaiveModelArgs",
        "dropout": 0.0,
        "rope_base": 10000.0,
        "dim": 768,
        "head_dim": 64,
        "n_local_heads": 2,
        "intermediate_size": 2304,
        "n_head": 12,
        "n_layer": 12,
        "vocab_size": 2049
      }
    }
  },
  "ar_length_regulator": {
    "_target_": "modules.v2.length_regulator.InterpolateRegulator",
    "channels": 768,
    "is_discrete": true,
    "codebook_size": 32,
    "sampling_ratios": [],
    "f0_condition": false
  },
  "style_encoder": {
    "_target_": "modules.campplus.DTDNN.CAMPPlus",
    "feat_dim": 80,
    "embedding_size": 192
  },
  "content_extractor_narrow": {
    "_target_": "modules.astral_quantization.default_model.AstralQuantizer",
    "tokenizer_name": "openai/whisper-small",
    "ssl_model_name": "facebook/hubert-large-ll60k",
    "ssl_output_layer": 18,
    "skip_ssl": true,
    "encoder": {
      "_target_": "modules.astral_quantization.convnext.ConvNeXtV2Stage",
      "dim": 512,
      "num_blocks": 12,
      "intermediate_dim": 1536,
      "dilation": 1,
      "input_dim": 1024
    },
    "quantizer": {
      "_target_": "modules.astral_quantization.bsq.BinarySphericalQuantize",
      "codebook_size": 32,
      "dim": 512,
      "entropy_loss_weight": 0.1,
      "diversity_gamma": 1.0,
      "spherical": true,
      "enable_entropy_loss": true,
      "soft_entropy_loss": true
    }
  },
  "content_extractor_wide": {
    "_target_": "modules.astral_quantization.default_model.AstralQuantizer",
    "tokenizer_name": "openai/whisper-small",
    "ssl_model_name": "facebook/hubert-large-ll60k",
    "ssl_output_layer": 18,
    "encoder": {
      "_target_": "modules.astral_quantization.convnext.ConvNeXtV2Stage",
      "dim": 512,
      "num_blocks": 12,
      "intermediate_dim": 1536,
      "dilation": 1,
      "input_dim": 1024
    },
    "quantizer": {
      "_target_": "modules.astral_quantization.bsq.BinarySphericalQuantize",
      "codebook_size": 2048,
      "dim": 512,
      "entropy_loss_weight": 0.1,
      "diversity_gamma": 1.0,
      "spherical": true,
      "enable_entropy_loss": true,
      "soft_entropy_loss": true
    }
  },
  "vocoder": {
    "_target_": "modules.bigvgan.bigvgan.BigVGAN.from_pretrained",
    "pretrained_model_name_or_path": "nvidia/bigvgan_v2_22khz_80band_256x",
    "use_cuda_kernel": false
  }
}