Asankilp commited on
Commit
adae6ec
·
1 Parent(s): e365095

new model test

Browse files
G_240000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3e5c09a20b3f96b887e29b6a31520ede750a82c7e1558fd22d8a2f2e77f3268
3
- size 542209243
 
 
 
 
G_321600.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e606d60c9ff1ff588637c9595c51d6e2e11429b7d71b328e566a72d5420b9248
3
- size 542209243
 
 
 
 
D_240000.pth → G_6800.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:988208e0630759cdbb8d60e65eff878cc8709987f841697efa1fdcb36f9e3efe
3
- size 561099207
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e989177f59bcbeff27ded3859bd59ce6ad29ca1c4681efb488f4e541aa03511f
3
+ size 542197727
config.json CHANGED
@@ -1,116 +1,111 @@
1
  {
2
- "train": {
3
- "log_interval": 200,
4
- "eval_interval": 800,
5
- "seed": 1234,
6
- "epochs": 10000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 6,
14
- "fp16_run": false,
15
- "half_type": "fp16",
16
- "lr_decay": 0.999875,
17
- "segment_size": 10240,
18
- "init_lr_ratio": 1,
19
- "warmup_epochs": 0,
20
- "c_mel": 45,
21
- "c_kl": 1.0,
22
- "use_sr": true,
23
- "max_speclen": 512,
24
- "port": "8001",
25
- "keep_ckpts": 3,
26
- "all_in_mem": false,
27
- "vol_aug": false
28
- },
29
- "data": {
30
- "training_files": "filelists/train.txt",
31
- "validation_files": "filelists/val.txt",
32
- "max_wav_value": 32768.0,
33
- "sampling_rate": 44100,
34
- "filter_length": 2048,
35
- "hop_length": 512,
36
- "win_length": 2048,
37
- "n_mel_channels": 80,
38
- "mel_fmin": 0.0,
39
- "mel_fmax": 22050,
40
- "unit_interpolate_mode": "nearest"
41
- },
42
- "model": {
43
- "inter_channels": 192,
44
- "hidden_channels": 192,
45
- "filter_channels": 768,
46
- "n_heads": 2,
47
- "n_layers": 6,
48
- "kernel_size": 3,
49
- "p_dropout": 0.1,
50
- "resblock": "1",
51
- "resblock_kernel_sizes": [
52
- 3,
53
- 7,
54
- 11
55
- ],
56
- "resblock_dilation_sizes": [
57
- [
58
- 1,
59
- 3,
60
- 5
61
- ],
62
- [
63
- 1,
64
- 3,
65
- 5
66
- ],
67
- [
68
- 1,
69
- 3,
70
- 5
71
- ]
72
- ],
73
- "upsample_rates": [
74
- 8,
75
- 8,
76
- 2,
77
- 2,
78
- 2
79
- ],
80
- "upsample_initial_channel": 512,
81
- "upsample_kernel_sizes": [
82
- 16,
83
- 16,
84
- 4,
85
- 4,
86
- 4
87
- ],
88
- "n_layers_q": 3,
89
- "n_layers_trans_flow": 3,
90
- "n_flow_layer": 4,
91
- "use_spectral_norm": false,
92
- "gin_channels": 256,
93
- "ssl_dim": 256,
94
- "n_speakers": 10,
95
- "vocoder_name": "nsf-hifigan",
96
- "speech_encoder": "vec256l9",
97
- "speaker_embedding": false,
98
- "vol_embedding": false,
99
- "use_depthwise_conv": false,
100
- "flow_share_parameter": false,
101
- "use_automatic_f0_prediction": true,
102
- "use_transformer_flow": false
103
- },
104
- "spk": {
105
- "aoyama_bluemountain": 0,
106
- "hoto_cocoa": 1,
107
- "jouga_maya": 2,
108
- "kafuu_chino": 3,
109
- "kafuu_takahiro": 4,
110
- "kirima_syaro": 5,
111
- "natsu_megumi": 6,
112
- "tedeza_rize": 7,
113
- "tippy": 8,
114
- "ujimatsu_chiya": 9
115
- }
116
  }
 
1
  {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 400,
5
+ "seed": 1234,
6
+ "epochs": 10000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 12,
14
+ "fp16_run": false,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 10240,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0,
21
+ "use_sr": true,
22
+ "max_speclen": 512,
23
+ "port": "8001",
24
+ "keep_ckpts": 5,
25
+ "all_in_mem": false
26
+ },
27
+ "data": {
28
+ "training_files": "filelists/train.txt",
29
+ "validation_files": "filelists/val.txt",
30
+ "max_wav_value": 32768.0,
31
+ "sampling_rate": 44100,
32
+ "filter_length": 2048,
33
+ "hop_length": 512,
34
+ "win_length": 2048,
35
+ "n_mel_channels": 80,
36
+ "mel_fmin": 0.0,
37
+ "mel_fmax": 22050
38
+ },
39
+ "model": {
40
+ "inter_channels": 192,
41
+ "hidden_channels": 192,
42
+ "filter_channels": 768,
43
+ "n_heads": 2,
44
+ "n_layers": 6,
45
+ "kernel_size": 3,
46
+ "p_dropout": 0.1,
47
+ "resblock": "1",
48
+ "resblock_kernel_sizes": [
49
+ 3,
50
+ 7,
51
+ 11
52
+ ],
53
+ "resblock_dilation_sizes": [
54
+ [
55
+ 1,
56
+ 3,
57
+ 5
58
+ ],
59
+ [
60
+ 1,
61
+ 3,
62
+ 5
63
+ ],
64
+ [
65
+ 1,
66
+ 3,
67
+ 5
68
+ ]
69
+ ],
70
+ "upsample_rates": [
71
+ 8,
72
+ 8,
73
+ 2,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4,
83
+ 4
84
+ ],
85
+ "n_layers_q": 3,
86
+ "use_spectral_norm": false,
87
+ "gin_channels": 256,
88
+ "ssl_dim": 256,
89
+ "n_speakers": 10,
90
+ "vocoder_name": "nsf-hifigan",
91
+ "speech_encoder": "vec256l9",
92
+ "speaker_embedding": false,
93
+ "vol_embedding": false,
94
+ "use_depthwise_conv": false,
95
+ "flow_share_parameter": false,
96
+ "use_automatic_f0_prediction": true,
97
+ "use_transformer_flow": false
98
+ },
99
+ "spk": {
100
+ "aoyama_bluemountain": 0,
101
+ "hoto_cocoa": 1,
102
+ "kafuu_chino": 2,
103
+ "kafuu_takahiro": 3,
104
+ "kirima_syaro": 4,
105
+ "natsu_megumi": 5,
106
+ "tedeza_rize": 6,
107
+ "tippy": 7,
108
+ "ujimatsu_chiya": 8,
109
+ "jouga_maya": 9
110
+ }
 
 
 
 
 
111
  }
diffusion/config.yaml DELETED
@@ -1,60 +0,0 @@
1
- data:
2
- block_size: 512
3
- cnhubertsoft_gate: 10
4
- duration: 2
5
- encoder: vec256l9
6
- encoder_hop_size: 320
7
- encoder_out_channels: 256
8
- encoder_sample_rate: 16000
9
- extensions:
10
- - wav
11
- sampling_rate: 44100
12
- training_files: filelists/train.txt
13
- unit_interpolate_mode: nearest
14
- validation_files: filelists/val.txt
15
- device: cuda
16
- env:
17
- expdir: logs/44k/diffusion
18
- gpu_id: 0
19
- infer:
20
- method: dpm-solver++
21
- speedup: 10
22
- model:
23
- k_step_max: 0
24
- n_chans: 512
25
- n_hidden: 256
26
- n_layers: 20
27
- n_spk: 10
28
- timesteps: 1000
29
- type: Diffusion
30
- use_pitch_aug: true
31
- spk:
32
- aoyama_bluemountain: 0
33
- hoto_cocoa: 1
34
- jouga_maya: 2
35
- kafuu_chino: 3
36
- kafuu_takahiro: 4
37
- kirima_syaro: 5
38
- natsu_megumi: 6
39
- tedeza_rize: 7
40
- tippy: 8
41
- ujimatsu_chiya: 9
42
- train:
43
- amp_dtype: fp32
44
- batch_size: 48
45
- cache_all_data: true
46
- cache_device: cpu
47
- cache_fp16: true
48
- decay_step: 100000
49
- epochs: 100000
50
- gamma: 0.5
51
- interval_force_save: 5000
52
- interval_log: 10
53
- interval_val: 2000
54
- lr: 0.0001
55
- num_workers: 4
56
- save_opt: false
57
- weight_decay: 0
58
- vocoder:
59
- ckpt: pretrain/nsf_hifigan/model
60
- type: nsf-hifigan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diffusion/model_50000.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:273cf30849f2c819c5097d26d74f7d62b622e2deef0e91e1412e0e94bb8f4260
3
- size 220380041