PluginsKers commited on
Commit
87cf724
·
verified ·
1 Parent(s): d93b2ad

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -20,18 +20,21 @@ library_name: pytorch
20
 
21
  # Convbased
22
 
23
- Gtihub: [https://github.com/Convbased/Convbased-Studio](https://github.com/Convbased/Convbased-Studio)
24
 
25
- 本项目专注于训练高质量的预训练底模,为语音转换任务提供强大的基础模型支持。
26
 
27
 
28
- | 特征提取 | 声码器 | 采样率40k | 采样率48k |
29
  |-----------|--------|-----|-----|
30
  | contentvec | hifigannsf | ❌ | ✅ |
31
  | contentvec | sifigan | ❌ | ✅ |
 
32
  | spin | hifigannsf | ❌ | ✅ |
33
  | spin | sifigan | ❌ | ✅ |
 
34
  | chinese-hubert-base | hifigannsf | ❌ | ✅ |
35
- | chinese-hubert-base | sifigan | 🧱 | 🧱 |
36
 
37
- *致力于推进中文语音合成技术的发展,该底模已用于微调大部分模型于 [Convbased Studio](https://weights.chat/)*
 
 
 
20
 
21
  # Convbased
22
 
23
+ Github: [https://github.com/Convbased/Convbased-Studio](https://github.com/Convbased/Convbased-Studio)
24
 
25
+ This project focuses on training high-quality pre-trained models.
26
 
27
 
28
+ | Feature Extraction | Vocoder | Sample Rate 40k | Sample Rate 48k |
29
  |-----------|--------|-----|-----|
30
  | contentvec | hifigannsf | ❌ | ✅ |
31
  | contentvec | sifigan | ❌ | ✅ |
32
+ | contentvec | bigvgan | ✅ | ❌ |
33
  | spin | hifigannsf | ❌ | ✅ |
34
  | spin | sifigan | ❌ | ✅ |
35
+ | spin-v2 | bigvgan | 🧱 | ❌ |
36
  | chinese-hubert-base | hifigannsf | ❌ | ✅ |
 
37
 
38
+
39
+ *Training code from [Applio](https://github.com/IAHispano/Applio)*
40
+ *Dedicated to advancing Chinese speech synthesis technology. These base models have been used for fine-tuning most models at [Convbased Studio](https://weights.chat/)*
contentvec/BigVGAN/D_135810.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f00324dbc8145f89283d041dbe6f9178c85960499510f8ce2eab6f3b6385a9
3
+ size 857123185
contentvec/BigVGAN/G_135810.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45fcd2f5d8af5655aa327c374268b62672b9516f82f206a2f29e5fa0c358aae2
3
+ size 438608285
contentvec/BigVGAN/config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "process_pids": [
3
+ 4048,
4
+ 4278,
5
+ 9099,
6
+ 10214,
7
+ 11439,
8
+ 12155,
9
+ 14658,
10
+ 1796,
11
+ 1797,
12
+ 1798,
13
+ 5406,
14
+ 5407,
15
+ 5408,
16
+ 5409,
17
+ 5410,
18
+ 5411,
19
+ 14883,
20
+ 14884,
21
+ 14885,
22
+ 14886,
23
+ 14887,
24
+ 14888,
25
+ 49208,
26
+ 49209,
27
+ 49210,
28
+ 49211,
29
+ 49212,
30
+ 49213,
31
+ 57786,
32
+ 57787,
33
+ 57788,
34
+ 57789,
35
+ 57790,
36
+ 57791
37
+ ],
38
+ "train": {
39
+ "log_interval": 200,
40
+ "seed": 1234,
41
+ "learning_rate": 0.0001,
42
+ "betas": [
43
+ 0.8,
44
+ 0.99
45
+ ],
46
+ "eps": 1e-09,
47
+ "lr_decay": 0.999875,
48
+ "segment_size": 12800,
49
+ "c_mel": 45,
50
+ "c_kl": 1.0
51
+ },
52
+ "data": {
53
+ "max_wav_value": 32768.0,
54
+ "sample_rate": 40000,
55
+ "filter_length": 2048,
56
+ "hop_length": 400,
57
+ "win_length": 2048,
58
+ "n_mel_channels": 125,
59
+ "mel_fmin": 0.0,
60
+ "mel_fmax": null
61
+ },
62
+ "model": {
63
+ "inter_channels": 192,
64
+ "hidden_channels": 192,
65
+ "filter_channels": 768,
66
+ "text_enc_hidden_dim": 768,
67
+ "n_heads": 2,
68
+ "n_layers": 6,
69
+ "kernel_size": 3,
70
+ "p_dropout": 0,
71
+ "resblock": "1",
72
+ "resblock_kernel_sizes": [
73
+ 3,
74
+ 7,
75
+ 11
76
+ ],
77
+ "resblock_dilation_sizes": [
78
+ [
79
+ 1,
80
+ 3,
81
+ 5
82
+ ],
83
+ [
84
+ 1,
85
+ 3,
86
+ 5
87
+ ],
88
+ [
89
+ 1,
90
+ 3,
91
+ 5
92
+ ]
93
+ ],
94
+ "upsample_rates": [
95
+ 10,
96
+ 10,
97
+ 2,
98
+ 2
99
+ ],
100
+ "upsample_initial_channel": 512,
101
+ "upsample_kernel_sizes": [
102
+ 16,
103
+ 16,
104
+ 4,
105
+ 4
106
+ ],
107
+ "use_spectral_norm": false,
108
+ "gin_channels": 256,
109
+ "spk_embed_dim": 109
110
+ }
111
+ }