Commit ·
128a993
1
Parent(s): d884e8c
update
Browse files- configs/combsub.yaml +6 -6
- configs/diffusion.yaml +8 -8
configs/combsub.yaml
CHANGED
|
@@ -5,12 +5,12 @@ data:
|
|
| 5 |
sampling_rate: 44100
|
| 6 |
block_size: 512 # Equal to hop_length
|
| 7 |
duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
|
| 8 |
-
encoder: '
|
| 9 |
cnhubertsoft_gate: 10 # only use with cnhubertsoftfish
|
| 10 |
encoder_sample_rate: 16000
|
| 11 |
encoder_hop_size: 320
|
| 12 |
encoder_out_channels: 256 # 768 if using 'hubertbase768', 'contentvec768' or 'contentvec768l12' encoder
|
| 13 |
-
encoder_ckpt: pretrain/hubert/
|
| 14 |
train_path: data/train # Create a folder named "audio" under this path and put the audio clip in it
|
| 15 |
valid_path: data/val # Create a folder named "audio" under this path and put the audio clip in it
|
| 16 |
model:
|
|
@@ -18,7 +18,7 @@ model:
|
|
| 18 |
n_spk: 1 # max number of different speakers
|
| 19 |
enhancer:
|
| 20 |
type: 'nsf-hifigan'
|
| 21 |
-
ckpt: 'pretrain/nsf_hifigan/model'
|
| 22 |
loss:
|
| 23 |
fft_min: 256
|
| 24 |
fft_max: 2048
|
|
@@ -28,10 +28,10 @@ env:
|
|
| 28 |
expdir: exp/combsub-test
|
| 29 |
gpu_id: 0
|
| 30 |
train:
|
| 31 |
-
num_workers:
|
| 32 |
-
batch_size:
|
| 33 |
cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
|
| 34 |
-
cache_device: '
|
| 35 |
cache_fp16: true
|
| 36 |
epochs: 100000
|
| 37 |
interval_log: 10
|
|
|
|
| 5 |
sampling_rate: 44100
|
| 6 |
block_size: 512 # Equal to hop_length
|
| 7 |
duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
|
| 8 |
+
encoder: 'contentvec' # 'hubertsoft', 'hubertbase', 'hubertbase768', 'contentvec', 'contentvec768' or 'contentvec768l12' or 'cnhubertsoftfish'
|
| 9 |
cnhubertsoft_gate: 10 # only use with cnhubertsoftfish
|
| 10 |
encoder_sample_rate: 16000
|
| 11 |
encoder_hop_size: 320
|
| 12 |
encoder_out_channels: 256 # 768 if using 'hubertbase768', 'contentvec768' or 'contentvec768l12' encoder
|
| 13 |
+
encoder_ckpt: /workspace/DDSP-SVC/pretrain/hubert/checkpoint_best_legacy_500.pt
|
| 14 |
train_path: data/train # Create a folder named "audio" under this path and put the audio clip in it
|
| 15 |
valid_path: data/val # Create a folder named "audio" under this path and put the audio clip in it
|
| 16 |
model:
|
|
|
|
| 18 |
n_spk: 1 # max number of different speakers
|
| 19 |
enhancer:
|
| 20 |
type: 'nsf-hifigan'
|
| 21 |
+
ckpt: '/workspace/DDSP-SVC/pretrain/nsf_hifigan/model'
|
| 22 |
loss:
|
| 23 |
fft_min: 256
|
| 24 |
fft_max: 2048
|
|
|
|
| 28 |
expdir: exp/combsub-test
|
| 29 |
gpu_id: 0
|
| 30 |
train:
|
| 31 |
+
num_workers: 0 # If your cpu and gpu are both very strong, set to 0 may be faster!
|
| 32 |
+
batch_size: 128
|
| 33 |
cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
|
| 34 |
+
cache_device: 'cuda' # Set to 'cuda' to cache the data into the Graphics-Memory, fastest speed for strong gpu
|
| 35 |
cache_fp16: true
|
| 36 |
epochs: 100000
|
| 37 |
interval_log: 10
|
configs/diffusion.yaml
CHANGED
|
@@ -5,12 +5,12 @@ data:
|
|
| 5 |
sampling_rate: 44100
|
| 6 |
block_size: 512 # Equal to hop_length
|
| 7 |
duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
|
| 8 |
-
encoder: '
|
| 9 |
cnhubertsoft_gate: 10 # only use with cnhubertsoftfish
|
| 10 |
encoder_sample_rate: 16000
|
| 11 |
encoder_hop_size: 320
|
| 12 |
encoder_out_channels: 256 # 768 if using 'hubertbase768', 'contentvec768' or 'contentvec768l12' encoder
|
| 13 |
-
encoder_ckpt: pretrain/hubert/
|
| 14 |
train_path: data/train # Create a folder named "audio" under this path and put the audio clip in it
|
| 15 |
valid_path: data/val # Create a folder named "audio" under this path and put the audio clip in it
|
| 16 |
model:
|
|
@@ -23,7 +23,7 @@ model:
|
|
| 23 |
device: cuda
|
| 24 |
vocoder:
|
| 25 |
type: 'nsf-hifigan'
|
| 26 |
-
ckpt: 'pretrain/nsf_hifigan/model'
|
| 27 |
infer:
|
| 28 |
speedup: 10
|
| 29 |
method: 'dpm-solver' # 'pndm' or 'dpm-solver'
|
|
@@ -31,16 +31,16 @@ env:
|
|
| 31 |
expdir: exp/diffusion-test
|
| 32 |
gpu_id: 0
|
| 33 |
train:
|
| 34 |
-
num_workers:
|
| 35 |
-
amp_dtype:
|
| 36 |
-
batch_size:
|
| 37 |
cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
|
| 38 |
-
cache_device: '
|
| 39 |
cache_fp16: true
|
| 40 |
epochs: 100000
|
| 41 |
interval_log: 10
|
| 42 |
interval_val: 2000
|
| 43 |
-
interval_force_save:
|
| 44 |
lr: 0.0002
|
| 45 |
decay_step: 100000
|
| 46 |
gamma: 0.5
|
|
|
|
| 5 |
sampling_rate: 44100
|
| 6 |
block_size: 512 # Equal to hop_length
|
| 7 |
duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
|
| 8 |
+
encoder: 'contentvec' # 'hubertsoft', 'hubertbase', 'hubertbase768', 'contentvec', 'contentvec768' or 'contentvec768l12' or 'cnhubertsoftfish'
|
| 9 |
cnhubertsoft_gate: 10 # only use with cnhubertsoftfish
|
| 10 |
encoder_sample_rate: 16000
|
| 11 |
encoder_hop_size: 320
|
| 12 |
encoder_out_channels: 256 # 768 if using 'hubertbase768', 'contentvec768' or 'contentvec768l12' encoder
|
| 13 |
+
encoder_ckpt: /workspace/DDSP-SVC/pretrain/hubert/checkpoint_best_legacy_500.pt
|
| 14 |
train_path: data/train # Create a folder named "audio" under this path and put the audio clip in it
|
| 15 |
valid_path: data/val # Create a folder named "audio" under this path and put the audio clip in it
|
| 16 |
model:
|
|
|
|
| 23 |
device: cuda
|
| 24 |
vocoder:
|
| 25 |
type: 'nsf-hifigan'
|
| 26 |
+
ckpt: '/workspace/DDSP-SVC/pretrain/nsf_hifigan/model'
|
| 27 |
infer:
|
| 28 |
speedup: 10
|
| 29 |
method: 'dpm-solver' # 'pndm' or 'dpm-solver'
|
|
|
|
| 31 |
expdir: exp/diffusion-test
|
| 32 |
gpu_id: 0
|
| 33 |
train:
|
| 34 |
+
num_workers: 0 # If your cpu and gpu are both very strong, set to 0 may be faster!
|
| 35 |
+
amp_dtype: fp16 # fp32, fp16 or bf16 (fp16 or bf16 may be faster if it is supported by your gpu)
|
| 36 |
+
batch_size: 96
|
| 37 |
cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
|
| 38 |
+
cache_device: 'cuda' # Set to 'cuda' to cache the data into the Graphics-Memory, fastest speed for strong gpu
|
| 39 |
cache_fp16: true
|
| 40 |
epochs: 100000
|
| 41 |
interval_log: 10
|
| 42 |
interval_val: 2000
|
| 43 |
+
interval_force_save: 10000
|
| 44 |
lr: 0.0002
|
| 45 |
decay_step: 100000
|
| 46 |
gamma: 0.5
|