Little-W commited on
Commit
ef8c536
·
1 Parent(s): 7d0ca7e
app.py CHANGED
@@ -28,7 +28,7 @@ class VitsGradio:
28
  with gr.Tab("SelectModel"):
29
  with gr.Column():
30
  modelstrs = gr.Dropdown(label = "模型", choices = self.modelPaths, value = self.modelPaths[0], type = "value")
31
- devicestrs = gr.Dropdown(label = "设备", choices = ["cpu"], value = "cpu", type = "value")
32
  btnMod = gr.Button("载入模型")
33
  btnMod.click(self.loadModel, inputs=[modelstrs,devicestrs], outputs = [self.dsid,self.VoiceConversion])
34
 
 
28
  with gr.Tab("SelectModel"):
29
  with gr.Column():
30
  modelstrs = gr.Dropdown(label = "模型", choices = self.modelPaths, value = self.modelPaths[0], type = "value")
31
+ devicestrs = gr.Dropdown(label = "设备", choices = ["cpu","cuda"], value = "cpu", type = "value")
32
  btnMod = gr.Button("载入模型")
33
  btnMod.click(self.loadModel, inputs=[modelstrs,devicestrs], outputs = [self.dsid,self.VoiceConversion])
34
 
checkpoints/atri/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "train": {
3
  "log_interval": 200,
4
- "eval_interval": 1000,
5
  "seed": 1234,
6
  "epochs": 10000,
7
  "learning_rate": 0.0001,
@@ -10,29 +10,34 @@
10
  0.99
11
  ],
12
  "eps": 1e-09,
13
- "batch_size": 12,
14
  "fp16_run": false,
 
15
  "lr_decay": 0.999875,
16
- "segment_size": 17920,
17
  "init_lr_ratio": 1,
18
  "warmup_epochs": 0,
19
  "c_mel": 45,
20
  "c_kl": 1.0,
21
  "use_sr": true,
22
- "max_speclen": 384,
23
- "port": "8001"
 
 
 
24
  },
25
  "data": {
26
  "training_files": "filelists/train.txt",
27
  "validation_files": "filelists/val.txt",
28
  "max_wav_value": 32768.0,
29
- "sampling_rate": 32000,
30
- "filter_length": 1280,
31
- "hop_length": 320,
32
- "win_length": 1280,
33
  "n_mel_channels": 80,
34
  "mel_fmin": 0.0,
35
- "mel_fmax": null
 
36
  },
37
  "model": {
38
  "inter_channels": 192,
@@ -66,8 +71,9 @@
66
  ]
67
  ],
68
  "upsample_rates": [
69
- 10,
70
  8,
 
 
71
  2,
72
  2
73
  ],
@@ -76,15 +82,24 @@
76
  16,
77
  16,
78
  4,
 
79
  4
80
  ],
81
  "n_layers_q": 3,
 
82
  "use_spectral_norm": false,
83
- "gin_channels": 256,
84
- "ssl_dim": 256,
85
- "n_speakers": 2
 
 
 
 
 
 
 
86
  },
87
  "spk": {
88
- "speaker0": 0
89
  }
90
  }
 
1
  {
2
  "train": {
3
  "log_interval": 200,
4
+ "eval_interval": 800,
5
  "seed": 1234,
6
  "epochs": 10000,
7
  "learning_rate": 0.0001,
 
10
  0.99
11
  ],
12
  "eps": 1e-09,
13
+ "batch_size": 6,
14
  "fp16_run": false,
15
+ "half_type": "fp16",
16
  "lr_decay": 0.999875,
17
+ "segment_size": 10240,
18
  "init_lr_ratio": 1,
19
  "warmup_epochs": 0,
20
  "c_mel": 45,
21
  "c_kl": 1.0,
22
  "use_sr": true,
23
+ "max_speclen": 512,
24
+ "port": "8001",
25
+ "keep_ckpts": 3,
26
+ "all_in_mem": false,
27
+ "vol_aug": false
28
  },
29
  "data": {
30
  "training_files": "filelists/train.txt",
31
  "validation_files": "filelists/val.txt",
32
  "max_wav_value": 32768.0,
33
+ "sampling_rate": 44100,
34
+ "filter_length": 2048,
35
+ "hop_length": 512,
36
+ "win_length": 2048,
37
  "n_mel_channels": 80,
38
  "mel_fmin": 0.0,
39
+ "mel_fmax": 22050,
40
+ "unit_interpolate_mode": "nearest"
41
  },
42
  "model": {
43
  "inter_channels": 192,
 
71
  ]
72
  ],
73
  "upsample_rates": [
 
74
  8,
75
+ 8,
76
+ 2,
77
  2,
78
  2
79
  ],
 
82
  16,
83
  16,
84
  4,
85
+ 4,
86
  4
87
  ],
88
  "n_layers_q": 3,
89
+ "n_flow_layer": 4,
90
  "use_spectral_norm": false,
91
+ "gin_channels": 768,
92
+ "ssl_dim": 768,
93
+ "n_speakers": 1,
94
+ "vocoder_name": "nsf-hifigan",
95
+ "speech_encoder": "vec768l12",
96
+ "speaker_embedding": false,
97
+ "vol_embedding": false,
98
+ "use_depthwise_conv": false,
99
+ "flow_share_parameter": false,
100
+ "use_automatic_f0_prediction": true
101
  },
102
  "spk": {
103
+ "ATRI": 0
104
  }
105
  }
checkpoints/atri/model.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00a949e99b19d2eb68dc8e040f276c6a3cff22a1d53d5ccbd469e7d62da75a6a
3
- size 699505437
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5df8f989316893e034c2a8f144ccaa9b0a758dbe3e4f154dc98f24d3459147
3
+ size 627905373