Upload 3 files
Browse files- config.yaml +74 -0
- model_0.pt +3 -0
- spk_info.npz +3 -0
config.yaml
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data:
|
| 2 |
+
block_size: 512
|
| 3 |
+
dataset_path: ../datasets/vctk-partial
|
| 4 |
+
duration: 1.8
|
| 5 |
+
encoder: dpwavlmbase
|
| 6 |
+
encoder_ckpt: models/pretrained/dphubert/DPWavLM-sp0.75.pth
|
| 7 |
+
encoder_hop_size: 320
|
| 8 |
+
encoder_out_channels: 768
|
| 9 |
+
encoder_sample_rate: 16000
|
| 10 |
+
extensions:
|
| 11 |
+
- wav
|
| 12 |
+
f0_extractor: rmvpe
|
| 13 |
+
f0_max: 1200
|
| 14 |
+
f0_min: 65
|
| 15 |
+
sampling_rate: 44100
|
| 16 |
+
spk_embed_channels: 256
|
| 17 |
+
spk_embed_encoder: pyannote.audio
|
| 18 |
+
spk_embed_encoder_ckpt: ./models/pretrained/pyannote.audio/wespeaker-voxceleb-resnet34-LM/pytorch_model.bin
|
| 19 |
+
spk_embed_encoder_sample_rate: 16000
|
| 20 |
+
volume_window_size: 8
|
| 21 |
+
device: cuda
|
| 22 |
+
env:
|
| 23 |
+
expdir: ../datasets/exp/vctk-partial
|
| 24 |
+
gpu_id: 0
|
| 25 |
+
loss:
|
| 26 |
+
beta: 0.8
|
| 27 |
+
fft_max: 2048
|
| 28 |
+
fft_min: 256
|
| 29 |
+
n_scale: 4
|
| 30 |
+
overlap: 0.5
|
| 31 |
+
use_dual_scale: false
|
| 32 |
+
use_dual_scale_log_freq: true
|
| 33 |
+
model:
|
| 34 |
+
f0_input_variance: 0.0
|
| 35 |
+
f0_offset_size_downsamples: 8
|
| 36 |
+
harmonic_env_size_downsamples: 8
|
| 37 |
+
no_use_embed_conv: false
|
| 38 |
+
noise_env_size_downsamples: 8
|
| 39 |
+
noise_seed: 289
|
| 40 |
+
noise_to_harmonic_phase: true
|
| 41 |
+
type: CombSubMinimumNoisedPhase
|
| 42 |
+
units_hidden_channels: 256
|
| 43 |
+
units_layers:
|
| 44 |
+
- - 10
|
| 45 |
+
- 11
|
| 46 |
+
use_f0_offset: true
|
| 47 |
+
use_harmonic_env: false
|
| 48 |
+
use_noise_env: true
|
| 49 |
+
use_speaker_embed: true
|
| 50 |
+
win_length: 2048
|
| 51 |
+
train:
|
| 52 |
+
amp_dtype: fp32
|
| 53 |
+
batch_size: 48
|
| 54 |
+
cache_all_data: true
|
| 55 |
+
cache_device: cuda
|
| 56 |
+
cache_fp16: true
|
| 57 |
+
epochs: 50000
|
| 58 |
+
frame_hop_random_max: 64
|
| 59 |
+
frame_hop_random_min: 32
|
| 60 |
+
interval_log: 10
|
| 61 |
+
interval_val: 2000
|
| 62 |
+
loss_variation: 0.1
|
| 63 |
+
low_similar_loss_variation: 0.7
|
| 64 |
+
lr: 0.0005
|
| 65 |
+
num_workers: 2
|
| 66 |
+
only_u2c_stack: false
|
| 67 |
+
save_opt: false
|
| 68 |
+
sched_cooldown: 2
|
| 69 |
+
sched_factor: 0.5
|
| 70 |
+
sched_min_lr: 3.0e-06
|
| 71 |
+
sched_patience: 30
|
| 72 |
+
sched_threshold: 1.0e-05
|
| 73 |
+
sched_threshold_mode: rel
|
| 74 |
+
weight_decay: 0
|
model_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f967f9047c1bbc68bef4f9919745309d0800603de73e427827487adc0d3cf0d
|
| 3 |
+
size 9591794
|
spk_info.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bb67178e37355c9efa7b8ecf8611bb365da4248080d2249756e863767bb97e9
|
| 3 |
+
size 153545
|