new model test
Browse files- G_240000.pth +0 -3
- G_321600.pth +0 -3
- D_240000.pth → G_6800.pth +2 -2
- config.json +109 -114
- diffusion/config.yaml +0 -60
- diffusion/model_50000.pt +0 -3
G_240000.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b3e5c09a20b3f96b887e29b6a31520ede750a82c7e1558fd22d8a2f2e77f3268
|
| 3 |
-
size 542209243
|
|
|
|
|
|
|
|
|
|
|
|
G_321600.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e606d60c9ff1ff588637c9595c51d6e2e11429b7d71b328e566a72d5420b9248
|
| 3 |
-
size 542209243
|
|
|
|
|
|
|
|
|
|
|
|
D_240000.pth → G_6800.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e989177f59bcbeff27ded3859bd59ce6ad29ca1c4681efb488f4e541aa03511f
|
| 3 |
+
size 542197727
|
config.json
CHANGED
|
@@ -1,116 +1,111 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
"
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
"
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
"natsu_megumi": 6,
|
| 112 |
-
"tedeza_rize": 7,
|
| 113 |
-
"tippy": 8,
|
| 114 |
-
"ujimatsu_chiya": 9
|
| 115 |
-
}
|
| 116 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 400,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 10000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 12,
|
| 14 |
+
"fp16_run": false,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 10240,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0,
|
| 21 |
+
"use_sr": true,
|
| 22 |
+
"max_speclen": 512,
|
| 23 |
+
"port": "8001",
|
| 24 |
+
"keep_ckpts": 5,
|
| 25 |
+
"all_in_mem": false
|
| 26 |
+
},
|
| 27 |
+
"data": {
|
| 28 |
+
"training_files": "filelists/train.txt",
|
| 29 |
+
"validation_files": "filelists/val.txt",
|
| 30 |
+
"max_wav_value": 32768.0,
|
| 31 |
+
"sampling_rate": 44100,
|
| 32 |
+
"filter_length": 2048,
|
| 33 |
+
"hop_length": 512,
|
| 34 |
+
"win_length": 2048,
|
| 35 |
+
"n_mel_channels": 80,
|
| 36 |
+
"mel_fmin": 0.0,
|
| 37 |
+
"mel_fmax": 22050
|
| 38 |
+
},
|
| 39 |
+
"model": {
|
| 40 |
+
"inter_channels": 192,
|
| 41 |
+
"hidden_channels": 192,
|
| 42 |
+
"filter_channels": 768,
|
| 43 |
+
"n_heads": 2,
|
| 44 |
+
"n_layers": 6,
|
| 45 |
+
"kernel_size": 3,
|
| 46 |
+
"p_dropout": 0.1,
|
| 47 |
+
"resblock": "1",
|
| 48 |
+
"resblock_kernel_sizes": [
|
| 49 |
+
3,
|
| 50 |
+
7,
|
| 51 |
+
11
|
| 52 |
+
],
|
| 53 |
+
"resblock_dilation_sizes": [
|
| 54 |
+
[
|
| 55 |
+
1,
|
| 56 |
+
3,
|
| 57 |
+
5
|
| 58 |
+
],
|
| 59 |
+
[
|
| 60 |
+
1,
|
| 61 |
+
3,
|
| 62 |
+
5
|
| 63 |
+
],
|
| 64 |
+
[
|
| 65 |
+
1,
|
| 66 |
+
3,
|
| 67 |
+
5
|
| 68 |
+
]
|
| 69 |
+
],
|
| 70 |
+
"upsample_rates": [
|
| 71 |
+
8,
|
| 72 |
+
8,
|
| 73 |
+
2,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4,
|
| 83 |
+
4
|
| 84 |
+
],
|
| 85 |
+
"n_layers_q": 3,
|
| 86 |
+
"use_spectral_norm": false,
|
| 87 |
+
"gin_channels": 256,
|
| 88 |
+
"ssl_dim": 256,
|
| 89 |
+
"n_speakers": 10,
|
| 90 |
+
"vocoder_name": "nsf-hifigan",
|
| 91 |
+
"speech_encoder": "vec256l9",
|
| 92 |
+
"speaker_embedding": false,
|
| 93 |
+
"vol_embedding": false,
|
| 94 |
+
"use_depthwise_conv": false,
|
| 95 |
+
"flow_share_parameter": false,
|
| 96 |
+
"use_automatic_f0_prediction": true,
|
| 97 |
+
"use_transformer_flow": false
|
| 98 |
+
},
|
| 99 |
+
"spk": {
|
| 100 |
+
"aoyama_bluemountain": 0,
|
| 101 |
+
"hoto_cocoa": 1,
|
| 102 |
+
"kafuu_chino": 2,
|
| 103 |
+
"kafuu_takahiro": 3,
|
| 104 |
+
"kirima_syaro": 4,
|
| 105 |
+
"natsu_megumi": 5,
|
| 106 |
+
"tedeza_rize": 6,
|
| 107 |
+
"tippy": 7,
|
| 108 |
+
"ujimatsu_chiya": 8,
|
| 109 |
+
"jouga_maya": 9
|
| 110 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
}
|
diffusion/config.yaml
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
data:
|
| 2 |
-
block_size: 512
|
| 3 |
-
cnhubertsoft_gate: 10
|
| 4 |
-
duration: 2
|
| 5 |
-
encoder: vec256l9
|
| 6 |
-
encoder_hop_size: 320
|
| 7 |
-
encoder_out_channels: 256
|
| 8 |
-
encoder_sample_rate: 16000
|
| 9 |
-
extensions:
|
| 10 |
-
- wav
|
| 11 |
-
sampling_rate: 44100
|
| 12 |
-
training_files: filelists/train.txt
|
| 13 |
-
unit_interpolate_mode: nearest
|
| 14 |
-
validation_files: filelists/val.txt
|
| 15 |
-
device: cuda
|
| 16 |
-
env:
|
| 17 |
-
expdir: logs/44k/diffusion
|
| 18 |
-
gpu_id: 0
|
| 19 |
-
infer:
|
| 20 |
-
method: dpm-solver++
|
| 21 |
-
speedup: 10
|
| 22 |
-
model:
|
| 23 |
-
k_step_max: 0
|
| 24 |
-
n_chans: 512
|
| 25 |
-
n_hidden: 256
|
| 26 |
-
n_layers: 20
|
| 27 |
-
n_spk: 10
|
| 28 |
-
timesteps: 1000
|
| 29 |
-
type: Diffusion
|
| 30 |
-
use_pitch_aug: true
|
| 31 |
-
spk:
|
| 32 |
-
aoyama_bluemountain: 0
|
| 33 |
-
hoto_cocoa: 1
|
| 34 |
-
jouga_maya: 2
|
| 35 |
-
kafuu_chino: 3
|
| 36 |
-
kafuu_takahiro: 4
|
| 37 |
-
kirima_syaro: 5
|
| 38 |
-
natsu_megumi: 6
|
| 39 |
-
tedeza_rize: 7
|
| 40 |
-
tippy: 8
|
| 41 |
-
ujimatsu_chiya: 9
|
| 42 |
-
train:
|
| 43 |
-
amp_dtype: fp32
|
| 44 |
-
batch_size: 48
|
| 45 |
-
cache_all_data: true
|
| 46 |
-
cache_device: cpu
|
| 47 |
-
cache_fp16: true
|
| 48 |
-
decay_step: 100000
|
| 49 |
-
epochs: 100000
|
| 50 |
-
gamma: 0.5
|
| 51 |
-
interval_force_save: 5000
|
| 52 |
-
interval_log: 10
|
| 53 |
-
interval_val: 2000
|
| 54 |
-
lr: 0.0001
|
| 55 |
-
num_workers: 4
|
| 56 |
-
save_opt: false
|
| 57 |
-
weight_decay: 0
|
| 58 |
-
vocoder:
|
| 59 |
-
ckpt: pretrain/nsf_hifigan/model
|
| 60 |
-
type: nsf-hifigan
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
diffusion/model_50000.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:273cf30849f2c819c5097d26d74f7d62b622e2deef0e91e1412e0e94bb8f4260
|
| 3 |
-
size 220380041
|
|
|
|
|
|
|
|
|
|
|
|