{ "_target_": "modules.v2.vc_wrapper.VoiceConversionWrapper", "sr": 22050, "hop_size": 256, "mel_fn": { "_target_": "modules.audio.mel_spectrogram", "_partial_": true, "n_fft": 1024, "win_size": 1024, "hop_size": 256, "num_mels": 80, "sampling_rate": 22050, "fmin": 0, "fmax": null, "center": false }, "cfm": { "_target_": "modules.v2.cfm.CFM", "estimator": { "_target_": "modules.v2.dit_wrapper.DiT", "time_as_token": true, "style_as_token": true, "uvit_skip_connection": false, "block_size": 8192, "depth": 13, "num_heads": 8, "hidden_dim": 512, "in_channels": 80, "content_dim": 512, "style_encoder_dim": 192, "class_dropout_prob": 0.1, "dropout_rate": 0.0, "attn_dropout_rate": 0.0 } }, "cfm_length_regulator": { "_target_": "modules.v2.length_regulator.InterpolateRegulator", "channels": 512, "is_discrete": true, "codebook_size": 2048, "sampling_ratios": [ 1, 1, 1, 1 ], "f0_condition": false }, "ar": { "_target_": "modules.v2.ar.NaiveWrapper", "model": { "_target_": "modules.v2.ar.NaiveTransformer", "config": { "_target_": "modules.v2.ar.NaiveModelArgs", "dropout": 0.0, "rope_base": 10000.0, "dim": 768, "head_dim": 64, "n_local_heads": 2, "intermediate_size": 2304, "n_head": 12, "n_layer": 12, "vocab_size": 2049 } } }, "ar_length_regulator": { "_target_": "modules.v2.length_regulator.InterpolateRegulator", "channels": 768, "is_discrete": true, "codebook_size": 32, "sampling_ratios": [], "f0_condition": false }, "style_encoder": { "_target_": "modules.campplus.DTDNN.CAMPPlus", "feat_dim": 80, "embedding_size": 192 }, "content_extractor_narrow": { "_target_": "modules.astral_quantization.default_model.AstralQuantizer", "tokenizer_name": "openai/whisper-small", "ssl_model_name": "facebook/hubert-large-ll60k", "ssl_output_layer": 18, "skip_ssl": true, "encoder": { "_target_": "modules.astral_quantization.convnext.ConvNeXtV2Stage", "dim": 512, "num_blocks": 12, "intermediate_dim": 1536, "dilation": 1, "input_dim": 1024 }, "quantizer": { "_target_": "modules.astral_quantization.bsq.BinarySphericalQuantize", "codebook_size": 32, "dim": 512, "entropy_loss_weight": 0.1, "diversity_gamma": 1.0, "spherical": true, "enable_entropy_loss": true, "soft_entropy_loss": true } }, "content_extractor_wide": { "_target_": "modules.astral_quantization.default_model.AstralQuantizer", "tokenizer_name": "openai/whisper-small", "ssl_model_name": "facebook/hubert-large-ll60k", "ssl_output_layer": 18, "encoder": { "_target_": "modules.astral_quantization.convnext.ConvNeXtV2Stage", "dim": 512, "num_blocks": 12, "intermediate_dim": 1536, "dilation": 1, "input_dim": 1024 }, "quantizer": { "_target_": "modules.astral_quantization.bsq.BinarySphericalQuantize", "codebook_size": 2048, "dim": 512, "entropy_loss_weight": 0.1, "diversity_gamma": 1.0, "spherical": true, "enable_entropy_loss": true, "soft_entropy_loss": true } }, "vocoder": { "_target_": "modules.bigvgan.bigvgan.BigVGAN.from_pretrained", "pretrained_model_name_or_path": "nvidia/bigvgan_v2_22khz_80band_256x", "use_cuda_kernel": false } }