Spaces:
Runtime error
Runtime error
Roi Feng
commited on
Commit
·
7171797
1
Parent(s):
2e9bf0c
new 2.2 HANSER model V3
Browse files- Data/config.json +99 -0
- Data/models/G_18500.pth +3 -0
- webui.py → app.py +17 -1
- config.yml +177 -0
Data/config.json
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 42,
|
| 6 |
+
"epochs": 1000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 12,
|
| 14 |
+
"fp16_run": false,
|
| 15 |
+
"lr_decay": 0.99995,
|
| 16 |
+
"segment_size": 16384,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0,
|
| 21 |
+
"skip_optimizer": true,
|
| 22 |
+
"freeze_ZH_bert": false,
|
| 23 |
+
"freeze_JP_bert": false,
|
| 24 |
+
"freeze_EN_bert": false
|
| 25 |
+
},
|
| 26 |
+
"data": {
|
| 27 |
+
"training_files": "Data/filelists/train.list",
|
| 28 |
+
"validation_files": "Data/filelists/val.list",
|
| 29 |
+
"max_wav_value": 32768.0,
|
| 30 |
+
"sampling_rate": 44100,
|
| 31 |
+
"filter_length": 2048,
|
| 32 |
+
"hop_length": 512,
|
| 33 |
+
"win_length": 2048,
|
| 34 |
+
"n_mel_channels": 128,
|
| 35 |
+
"mel_fmin": 0.0,
|
| 36 |
+
"mel_fmax": null,
|
| 37 |
+
"add_blank": true,
|
| 38 |
+
"n_speakers": 1,
|
| 39 |
+
"cleaned_text": true,
|
| 40 |
+
"spk2id": {
|
| 41 |
+
"hanser": 0
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"model": {
|
| 45 |
+
"use_spk_conditioned_encoder": true,
|
| 46 |
+
"use_noise_scaled_mas": true,
|
| 47 |
+
"use_mel_posterior_encoder": false,
|
| 48 |
+
"use_duration_discriminator": true,
|
| 49 |
+
"inter_channels": 192,
|
| 50 |
+
"hidden_channels": 192,
|
| 51 |
+
"filter_channels": 768,
|
| 52 |
+
"n_heads": 2,
|
| 53 |
+
"n_layers": 6,
|
| 54 |
+
"kernel_size": 3,
|
| 55 |
+
"p_dropout": 0.1,
|
| 56 |
+
"resblock": "1",
|
| 57 |
+
"resblock_kernel_sizes": [
|
| 58 |
+
3,
|
| 59 |
+
7,
|
| 60 |
+
11
|
| 61 |
+
],
|
| 62 |
+
"resblock_dilation_sizes": [
|
| 63 |
+
[
|
| 64 |
+
1,
|
| 65 |
+
3,
|
| 66 |
+
5
|
| 67 |
+
],
|
| 68 |
+
[
|
| 69 |
+
1,
|
| 70 |
+
3,
|
| 71 |
+
5
|
| 72 |
+
],
|
| 73 |
+
[
|
| 74 |
+
1,
|
| 75 |
+
3,
|
| 76 |
+
5
|
| 77 |
+
]
|
| 78 |
+
],
|
| 79 |
+
"upsample_rates": [
|
| 80 |
+
8,
|
| 81 |
+
8,
|
| 82 |
+
2,
|
| 83 |
+
2,
|
| 84 |
+
2
|
| 85 |
+
],
|
| 86 |
+
"upsample_initial_channel": 512,
|
| 87 |
+
"upsample_kernel_sizes": [
|
| 88 |
+
16,
|
| 89 |
+
16,
|
| 90 |
+
8,
|
| 91 |
+
2,
|
| 92 |
+
2
|
| 93 |
+
],
|
| 94 |
+
"n_layers_q": 3,
|
| 95 |
+
"use_spectral_norm": false,
|
| 96 |
+
"gin_channels": 256
|
| 97 |
+
},
|
| 98 |
+
"version": "2.2"
|
| 99 |
+
}
|
Data/models/G_18500.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae74457f32ed7bfc1018140f13b752ebe72784d5c5dfa66ec98fa274e83bf694
|
| 3 |
+
size 749765494
|
webui.py → app.py
RENAMED
|
@@ -394,6 +394,14 @@ if __name__ == "__main__":
|
|
| 394 |
with gr.Blocks() as app:
|
| 395 |
with gr.Row():
|
| 396 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
text = gr.TextArea(
|
| 398 |
label="输入文本内容",
|
| 399 |
placeholder="""
|
|
@@ -405,8 +413,8 @@ if __name__ == "__main__":
|
|
| 405 |
...
|
| 406 |
另外,所有的语言选项都可以用'|'分割长段实现分句生成。
|
| 407 |
""",
|
|
|
|
| 408 |
)
|
| 409 |
-
trans = gr.Button("中翻日", variant="primary")
|
| 410 |
slicer = gr.Button("快速切分", variant="primary")
|
| 411 |
speaker = gr.Dropdown(
|
| 412 |
choices=speakers, value=speakers[0], label="Speaker"
|
|
@@ -474,6 +482,14 @@ if __name__ == "__main__":
|
|
| 474 |
# show_download_button=False,
|
| 475 |
# value=os.path.abspath("./img/参数说明.png"),
|
| 476 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
btn.click(
|
| 478 |
tts_fn,
|
| 479 |
inputs=[
|
|
|
|
| 394 |
with gr.Blocks() as app:
|
| 395 |
with gr.Row():
|
| 396 |
with gr.Column():
|
| 397 |
+
gr.Markdown(value="""
|
| 398 |
+
🤖 【AI Hanser】在线语音合成 Bert-Vits2 V3.0 🤖\n
|
| 399 |
+
📝 作者:Rayzggz 📰博客 https://roi.moe 📺B站 https://space.bilibili.com/10501326 📝\n
|
| 400 |
+
🎤 声音来源:Hanser https://space.bilibili.com/11073 🎤\n
|
| 401 |
+
🔗 Bert-VITS2:https://github.com/fishaudio/Bert-VITS2 🔗\n
|
| 402 |
+
✅ 使用本模型请遵守中华人民共和国和美利坚合众国法律 ✅\n
|
| 403 |
+
🏷️ 使用基于本模型的所有生成内容均需标注「使用Bert-VITS2 AI生成」、「本项目地址」、「作者名称」和「声音来源」 🏷️\n
|
| 404 |
+
""")
|
| 405 |
text = gr.TextArea(
|
| 406 |
label="输入文本内容",
|
| 407 |
placeholder="""
|
|
|
|
| 413 |
...
|
| 414 |
另外,所有的语言选项都可以用'|'分割长段实现分句生成。
|
| 415 |
""",
|
| 416 |
+
value="大家好,我是憨色,今天给大家看看我的摩托车车",
|
| 417 |
)
|
|
|
|
| 418 |
slicer = gr.Button("快速切分", variant="primary")
|
| 419 |
speaker = gr.Dropdown(
|
| 420 |
choices=speakers, value=speakers[0], label="Speaker"
|
|
|
|
| 482 |
# show_download_button=False,
|
| 483 |
# value=os.path.abspath("./img/参数说明.png"),
|
| 484 |
# )
|
| 485 |
+
|
| 486 |
+
gr.Markdown(value="""
|
| 487 |
+
👏 鸣谢: 👏\n
|
| 488 |
+
👤 领航员未鸟 https://space.bilibili.com/2403955 👤\n
|
| 489 |
+
👤 怎么好就怎么来 https://space.bilibili.com/259582714 👤\n
|
| 490 |
+
🧠 Google Colab https://colab.research.google.com/ 🧠\n
|
| 491 |
+
📧 如果你是“Hanser”,并且希望对此模型主张权利,请通过上方“作者”部分的联系方式联系,我将积极配合处理。📧 \n
|
| 492 |
+
""")
|
| 493 |
btn.click(
|
| 494 |
tts_fn,
|
| 495 |
inputs=[
|
config.yml
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 全局配置
|
| 2 |
+
# 对于希望在同一时间使用多个配置文件的情况,例如两个GPU同时跑两个训练集:通过环境变量指定配置文件,不指定则默认为./config.yml
|
| 3 |
+
|
| 4 |
+
# 拟提供通用路径配置,统一存放数据,避免数据放得很乱
|
| 5 |
+
# 每个数据集与其对应的模型存放至统一路径下,后续所有的路径配置均为相对于datasetPath的路径
|
| 6 |
+
# 不填或者填空则路径为相对于项目根目录的路径
|
| 7 |
+
dataset_path: "Data/"
|
| 8 |
+
|
| 9 |
+
# 模型镜像源,默认huggingface,使用openi镜像源需指定openi_token
|
| 10 |
+
mirror: ""
|
| 11 |
+
openi_token: "" # openi token
|
| 12 |
+
|
| 13 |
+
# resample 音频重采样配置
|
| 14 |
+
# 注意, “:” 后需要加空格
|
| 15 |
+
resample:
|
| 16 |
+
# 目标重采样率
|
| 17 |
+
sampling_rate: 44100
|
| 18 |
+
# 音频文件输入路径,重采样会将该路径下所有.wav音频文件重采样
|
| 19 |
+
# 请填入相对于datasetPath的相对路径
|
| 20 |
+
in_dir: "audios/raw" # 相对于根目录的路径为 /datasetPath/in_dir
|
| 21 |
+
# 音频文件重采样后输出路径
|
| 22 |
+
out_dir: "audios/wavs"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# preprocess_text 数据集预处理相关配置
|
| 26 |
+
# 注意, “:” 后需要加空格
|
| 27 |
+
preprocess_text:
|
| 28 |
+
# 原始文本文件路径,文本格式应为{wav_path}|{speaker_name}|{language}|{text}。
|
| 29 |
+
transcription_path: "filelists/hanser.list"
|
| 30 |
+
# 数据清洗后文本路径,可以不填。不填则将在原始文本目录生成
|
| 31 |
+
cleaned_path: ""
|
| 32 |
+
# 训练集路径
|
| 33 |
+
train_path: "filelists/train.list"
|
| 34 |
+
# 验证集路径
|
| 35 |
+
val_path: "filelists/val.list"
|
| 36 |
+
# 配置文件路径
|
| 37 |
+
config_path: "config.json"
|
| 38 |
+
# 每个语言的验证集条数
|
| 39 |
+
val_per_lang: 4
|
| 40 |
+
# 验证集最大条数,多于的会被截断并放到训练集中
|
| 41 |
+
max_val_total: 12
|
| 42 |
+
# 是否进行数据清洗
|
| 43 |
+
clean: true
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# bert_gen 相关配置
|
| 47 |
+
# 注意, “:” 后需要加空格
|
| 48 |
+
bert_gen:
|
| 49 |
+
# 训练数据集配置文件路径
|
| 50 |
+
config_path: "config.json"
|
| 51 |
+
# 并行数
|
| 52 |
+
num_processes: 4
|
| 53 |
+
# 使用设备:可选项 "cuda" 显卡推理,"cpu" cpu推理
|
| 54 |
+
# 该选项同时决定了get_bert_feature的默认设备
|
| 55 |
+
device: "cuda"
|
| 56 |
+
# 使用多卡推理
|
| 57 |
+
use_multi_device: false
|
| 58 |
+
|
| 59 |
+
# emo_gen 相关配置
|
| 60 |
+
# 注意, “:” 后需要加空格
|
| 61 |
+
emo_gen:
|
| 62 |
+
# 训练数据集配置文件路径
|
| 63 |
+
config_path: "config.json"
|
| 64 |
+
# 并行数
|
| 65 |
+
num_processes: 4
|
| 66 |
+
# 使用设备:可选项 "cuda" 显卡推理,"cpu" cpu推理
|
| 67 |
+
device: "cuda"
|
| 68 |
+
# 使用多卡推理
|
| 69 |
+
use_multi_device: false
|
| 70 |
+
|
| 71 |
+
# train 训练配置
|
| 72 |
+
# 注意, “:” 后需要加空格
|
| 73 |
+
train_ms:
|
| 74 |
+
env:
|
| 75 |
+
MASTER_ADDR: "localhost"
|
| 76 |
+
MASTER_PORT: 10086
|
| 77 |
+
WORLD_SIZE: 1
|
| 78 |
+
LOCAL_RANK: 0
|
| 79 |
+
RANK: 0
|
| 80 |
+
# 可以填写任意名的环境变量
|
| 81 |
+
# THE_ENV_VAR_YOU_NEED_TO_USE: "1234567"
|
| 82 |
+
# 底模设置
|
| 83 |
+
base:
|
| 84 |
+
use_base_model: false
|
| 85 |
+
repo_id: "Stardust_minus/Bert-VITS2"
|
| 86 |
+
model_image: "Bert-VITS2_2.2-Clap底模" # openi网页的模型名
|
| 87 |
+
# 训练模型存储目录:与旧版本的区别,原先数据集是存放在logs/model_name下的,现在改为统一存放在Data/你的数据集/models下
|
| 88 |
+
model: "models"
|
| 89 |
+
# 配置文件路径
|
| 90 |
+
config_path: "config.json"
|
| 91 |
+
# 训练使用的worker,不建议超过CPU核心数
|
| 92 |
+
num_workers: 16
|
| 93 |
+
# 关闭此项可以节约接近50%的磁盘空间,但是可能导致实际训练速度变慢和更高的CPU使用率。
|
| 94 |
+
spec_cache: True
|
| 95 |
+
# 保存的检查点数量,多于此数目的权重会被删除来节省空间。
|
| 96 |
+
keep_ckpts: 8
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# webui webui配置
|
| 100 |
+
# 注意, “:” 后需要加空格
|
| 101 |
+
webui:
|
| 102 |
+
# 推理设备
|
| 103 |
+
device: "cuda"
|
| 104 |
+
# 模型路径
|
| 105 |
+
model: "models/G_18500.pth"
|
| 106 |
+
# 配置文件路径
|
| 107 |
+
config_path: "config.json"
|
| 108 |
+
# 端口号
|
| 109 |
+
port: 7860
|
| 110 |
+
# 是否公开部署,对外网开放
|
| 111 |
+
share: false
|
| 112 |
+
# 是否开启debug模式
|
| 113 |
+
debug: false
|
| 114 |
+
# 语种识别库,可选langid, fastlid
|
| 115 |
+
language_identification_library: "langid"
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# server-fastapi配置
|
| 119 |
+
# 注意, “:” 后需要加空格
|
| 120 |
+
# 注意,本配置下的所有配置均为相对于根目录的路径
|
| 121 |
+
server:
|
| 122 |
+
# 端口号
|
| 123 |
+
port: 5000
|
| 124 |
+
# 模型默认使用设备:但是当前并没有实现这个配置。
|
| 125 |
+
device: "cuda"
|
| 126 |
+
# 需要加载的所有模型的配置,可以填多个模型,也可以不填模型,等网页成功后手动加载模型
|
| 127 |
+
# 不加载模型的配置格式:删除默认给的两个模型配置,给models赋值 [ ],也就是空列表。参考模型2的speakers 即 models: [ ]
|
| 128 |
+
# 注意,所有模型都必须正确配置model与config的路径,空路径会导致加载错误。
|
| 129 |
+
# 也可以不填模型,等网页加载成功后手动填写models。
|
| 130 |
+
models:
|
| 131 |
+
- # 模型的路径
|
| 132 |
+
model: ""
|
| 133 |
+
# 模型config.json的路径
|
| 134 |
+
config: ""
|
| 135 |
+
# 模型使用设备,若填写则会覆盖默认配置
|
| 136 |
+
device: "cuda"
|
| 137 |
+
# 模型默认使用的语言
|
| 138 |
+
language: "ZH"
|
| 139 |
+
# 模型人物默认参数
|
| 140 |
+
# 不必填写所有人物,不填的使用默认值
|
| 141 |
+
# 暂时不用填写,当前尚未实现按人区分配置
|
| 142 |
+
speakers:
|
| 143 |
+
- speaker: "科比"
|
| 144 |
+
sdp_ratio: 0.2
|
| 145 |
+
noise_scale: 0.6
|
| 146 |
+
noise_scale_w: 0.8
|
| 147 |
+
length_scale: 1
|
| 148 |
+
- speaker: "五条悟"
|
| 149 |
+
sdp_ratio: 0.3
|
| 150 |
+
noise_scale: 0.7
|
| 151 |
+
noise_scale_w: 0.8
|
| 152 |
+
length_scale: 0.5
|
| 153 |
+
- speaker: "安倍晋三"
|
| 154 |
+
sdp_ratio: 0.2
|
| 155 |
+
noise_scale: 0.6
|
| 156 |
+
noise_scale_w: 0.8
|
| 157 |
+
length_scale: 1.2
|
| 158 |
+
- # 模型的路径
|
| 159 |
+
model: ""
|
| 160 |
+
# 模型config.json的路径
|
| 161 |
+
config: ""
|
| 162 |
+
# 模型使用设备,若填写则会覆盖默认配置
|
| 163 |
+
device: "cpu"
|
| 164 |
+
# 模型默认使用的语言
|
| 165 |
+
language: "JP"
|
| 166 |
+
# 模型人物默认参数
|
| 167 |
+
# 不必填写所有人物,不填的使用默认值
|
| 168 |
+
speakers: [ ] # 也可以不填
|
| 169 |
+
|
| 170 |
+
# 百度翻译开放平台 api配置
|
| 171 |
+
# api接入文档 https://api.fanyi.baidu.com/doc/21
|
| 172 |
+
# 请不要在github等网站公开分享你的app id 与 key
|
| 173 |
+
translate:
|
| 174 |
+
# 你的APPID
|
| 175 |
+
"app_key": ""
|
| 176 |
+
# 你的密钥
|
| 177 |
+
"secret_key": ""
|