Upload 2 files
Browse files
config_dit_mel_seed_uvit_whisper_base_f0_44k.yml
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
-
log_dir: "./runs"
|
| 2 |
save_freq: 1
|
| 3 |
log_interval: 10
|
| 4 |
save_interval: 1000
|
| 5 |
device: "cuda"
|
| 6 |
epochs: 1000 # number of epochs for first stage training (pre-training)
|
| 7 |
-
batch_size:
|
| 8 |
batch_length: 100 # maximum duration of audio in a batch (in seconds)
|
| 9 |
max_len: 80 # maximum number of frames
|
| 10 |
pretrained_model: ""
|
|
@@ -25,13 +25,17 @@ model_params:
|
|
| 25 |
dit_type: "DiT" # uDiT or DiT
|
| 26 |
reg_loss_type: "l1" # l1 or l2
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
speech_tokenizer:
|
| 29 |
type: 'whisper'
|
| 30 |
-
|
| 31 |
-
path: "speech_tokenizer_v1.onnx"
|
| 32 |
-
|
| 33 |
-
cosyvoice:
|
| 34 |
-
path: "../CosyVoice/pretrained_models/CosyVoice-300M"
|
| 35 |
|
| 36 |
style_encoder:
|
| 37 |
dim: 192
|
|
|
|
| 1 |
+
log_dir: "./runs/run_dit_mel_seed_uvit_whisper_base_f0_44k"
|
| 2 |
save_freq: 1
|
| 3 |
log_interval: 10
|
| 4 |
save_interval: 1000
|
| 5 |
device: "cuda"
|
| 6 |
epochs: 1000 # number of epochs for first stage training (pre-training)
|
| 7 |
+
batch_size: 1
|
| 8 |
batch_length: 100 # maximum duration of audio in a batch (in seconds)
|
| 9 |
max_len: 80 # maximum number of frames
|
| 10 |
pretrained_model: ""
|
|
|
|
| 25 |
dit_type: "DiT" # uDiT or DiT
|
| 26 |
reg_loss_type: "l1" # l1 or l2
|
| 27 |
|
| 28 |
+
timbre_shifter:
|
| 29 |
+
se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
|
| 30 |
+
ckpt_path: './modules/openvoice/checkpoints_v2/converter'
|
| 31 |
+
|
| 32 |
+
vocoder:
|
| 33 |
+
type: "bigvgan"
|
| 34 |
+
name: "nvidia/bigvgan_v2_44khz_128band_512x"
|
| 35 |
+
|
| 36 |
speech_tokenizer:
|
| 37 |
type: 'whisper'
|
| 38 |
+
name: "openai/whisper-small"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
style_encoder:
|
| 41 |
dim: 192
|
config_dit_mel_seed_uvit_whisper_small_wavenet.yml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
log_dir: "./runs"
|
| 2 |
save_freq: 1
|
| 3 |
log_interval: 10
|
| 4 |
save_interval: 1000
|
|
@@ -25,24 +25,21 @@ model_params:
|
|
| 25 |
dit_type: "DiT" # uDiT or DiT
|
| 26 |
reg_loss_type: "l1" # l1 or l2
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
speech_tokenizer:
|
| 29 |
type: 'whisper'
|
| 30 |
-
|
| 31 |
-
path: "speech_tokenizer_v1.onnx"
|
| 32 |
-
|
| 33 |
-
cosyvoice:
|
| 34 |
-
path: "../CosyVoice/pretrained_models/CosyVoice-300M"
|
| 35 |
|
| 36 |
style_encoder:
|
| 37 |
dim: 192
|
| 38 |
campplus_path: "campplus_cn_common.bin"
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
decoder_dim: 1536
|
| 44 |
-
decoder_rates: [ 6, 5, 5, 2 ]
|
| 45 |
-
sr: 24000
|
| 46 |
|
| 47 |
length_regulator:
|
| 48 |
channels: 512
|
|
|
|
| 1 |
+
log_dir: "./runs/run_dit_mel_seed_uvit_whisper_small_wavenet"
|
| 2 |
save_freq: 1
|
| 3 |
log_interval: 10
|
| 4 |
save_interval: 1000
|
|
|
|
| 25 |
dit_type: "DiT" # uDiT or DiT
|
| 26 |
reg_loss_type: "l1" # l1 or l2
|
| 27 |
|
| 28 |
+
timbre_shifter:
|
| 29 |
+
se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
|
| 30 |
+
ckpt_path: './modules/openvoice/checkpoints_v2/converter'
|
| 31 |
+
|
| 32 |
speech_tokenizer:
|
| 33 |
type: 'whisper'
|
| 34 |
+
name: "openai/whisper-small"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
style_encoder:
|
| 37 |
dim: 192
|
| 38 |
campplus_path: "campplus_cn_common.bin"
|
| 39 |
|
| 40 |
+
vocoder:
|
| 41 |
+
type: "bigvgan"
|
| 42 |
+
name: "nvidia/bigvgan_v2_22khz_80band_256x"
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
length_regulator:
|
| 45 |
channels: 512
|