Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modelscope.pipelines import pipeline
|
| 2 |
+
from modelscope.utils.constant import Tasks
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import os
|
| 5 |
+
import shutil
|
| 6 |
+
import os.path as osp
|
| 7 |
+
from modelscope.tools import run_auto_label
|
| 8 |
+
from modelscope.metainfo import Trainers
|
| 9 |
+
from modelscope.trainers import build_trainer
|
| 10 |
+
from modelscope.utils.audio.audio_utils import TtsTrainType
|
| 11 |
+
from modelscope.models.audio.tts import SambertHifigan
|
| 12 |
+
import IPython.display as ipd
|
| 13 |
+
|
| 14 |
+
def launch_training_task(*audio_lst):
|
| 15 |
+
os.makedirs("/tmp/test_wavs")
|
| 16 |
+
os.makedirs("/tmp/output_training_data")
|
| 17 |
+
os.makedirs("/tmp/pretrain_work_dir")
|
| 18 |
+
for num in range(len(audio_lst)):
|
| 19 |
+
if num<10:
|
| 20 |
+
shutil.copy(audio_lst[num], "/tmp/test_wavs/01_00000"+str(num)+".wav")
|
| 21 |
+
else:
|
| 22 |
+
shutil.copy(audio_lst[num], "/tmp/test_wavs/01_0000"+str(num)+".wav")
|
| 23 |
+
|
| 24 |
+
input_wav = "/tmp/test_wavs/"
|
| 25 |
+
output_data = "/tmp/output_training_data/"
|
| 26 |
+
ret, report = run_auto_label(input_wav=input_wav, work_dir=output_data, resource_revision="v1.0.7")
|
| 27 |
+
|
| 28 |
+
pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'
|
| 29 |
+
dataset_id = "/tmp/output_training_data/"
|
| 30 |
+
pretrain_work_dir = "/tmp/pretrain_work_dir/"
|
| 31 |
+
|
| 32 |
+
train_info = {
|
| 33 |
+
TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型
|
| 34 |
+
'train_steps': 202, # 训练多少个step
|
| 35 |
+
'save_interval_steps': 200, # 每训练多少个step保存一次checkpoint
|
| 36 |
+
'log_interval': 10 # 每训练多少个step打印一次训练日志
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
kwargs = dict(
|
| 41 |
+
model=pretrained_model_id, # 指定要finetune的模型
|
| 42 |
+
model_revision = "v1.0.6",
|
| 43 |
+
work_dir=pretrain_work_dir, # 指定临时工作目录
|
| 44 |
+
train_dataset=dataset_id, # 指定数据集id
|
| 45 |
+
train_type=train_info # 指定要训练类型及参数
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
trainer = build_trainer(Trainers.speech_kantts_trainer,default_args=kwargs)
|
| 49 |
+
|
| 50 |
+
trainer.train()
|
| 51 |
+
|
| 52 |
+
return "已训练完成"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def greet(audio):
|
| 56 |
+
print(audio)
|
| 57 |
+
inference_pipeline = pipeline(task=Tasks.auto_speech_recognition, model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch')
|
| 58 |
+
rec_result = inference_pipeline(audio_in=audio)
|
| 59 |
+
input_gpt3 = rec_result['text']+"|"
|
| 60 |
+
|
| 61 |
+
text_generation_zh = pipeline(Tasks.text_generation, model='damo/nlp_gpt3_text-generation_chinese-base')
|
| 62 |
+
result_gpt = text_generation_zh(input_gpt3)
|
| 63 |
+
|
| 64 |
+
input_pttp = result_gpt[result_gpt.find("|")+1:]
|
| 65 |
+
|
| 66 |
+
model_dir = os.path.abspath("/tmp/pretrain_work_dir")
|
| 67 |
+
|
| 68 |
+
custom_infer_abs = {
|
| 69 |
+
'voice_name':
|
| 70 |
+
'F7',
|
| 71 |
+
'am_ckpt':
|
| 72 |
+
os.path.join(model_dir, 'tmp_am', 'ckpt'),
|
| 73 |
+
'am_config':
|
| 74 |
+
os.path.join(model_dir, 'tmp_am', 'config.yaml'),
|
| 75 |
+
'voc_ckpt':
|
| 76 |
+
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
|
| 77 |
+
'voc_config':
|
| 78 |
+
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
|
| 79 |
+
'config.yaml'),
|
| 80 |
+
'audio_config':
|
| 81 |
+
os.path.join(model_dir, 'data', 'audio_config.yaml'),
|
| 82 |
+
'se_file':
|
| 83 |
+
os.path.join(model_dir, 'data', 'se', 'se.npy')
|
| 84 |
+
}
|
| 85 |
+
kwargs = {'custom_ckpt': custom_infer_abs}
|
| 86 |
+
|
| 87 |
+
model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs)
|
| 88 |
+
|
| 89 |
+
inference = pipeline(task=Tasks.text_to_speech, model=model_id)
|
| 90 |
+
output = inference(input=input_pttp)
|
| 91 |
+
|
| 92 |
+
return rec_result['text'],ipd.Audio(output["output_wav"], rate=16000)
|
| 93 |
+
|
| 94 |
+
with gr.Blocks() as demo:
|
| 95 |
+
with gr.Row():
|
| 96 |
+
with gr.Column(scale=1):
|
| 97 |
+
audio_lst1 = [
|
| 98 |
+
gr.Audio(label="1. 希望我们大家都能像他一样"),
|
| 99 |
+
gr.Audio(label="2. 不行, 他想了一下, 我不能这样对国王说, 这是在撒谎"),
|
| 100 |
+
gr.Audio(label="3. 但他们非常和气地问她说, 你叫什么名字"),
|
| 101 |
+
gr.Audio(label="4. 鸭子心想, 我必须去拿回我的软糖豆"),
|
| 102 |
+
gr.Audio(label="5. 小朋友, 你们不要再欺负它了"),
|
| 103 |
+
]
|
| 104 |
+
with gr.Column(scale=1):
|
| 105 |
+
audio_lst2 = [
|
| 106 |
+
gr.Audio(label="6. 可是, 小黄鸭并不怕他们"),
|
| 107 |
+
gr.Audio(label="7. 然后, 他们一起走了很长一段时间"),
|
| 108 |
+
gr.Audio(label="8. 突然, 墙壁后面传来一阵声音"),
|
| 109 |
+
gr.Audio(label="9. 结果盘子掉在地上, 打得粉碎"),
|
| 110 |
+
gr.Audio(label="10. 四个小伙伴很开心, 一起感谢小松鼠的帮助"),
|
| 111 |
+
]
|
| 112 |
+
with gr.Column(scale=1):
|
| 113 |
+
audio_lst3 = [
|
| 114 |
+
gr.Audio(label="11. 不过, 当他看到拇指姑娘的时候, 他马上就变得高兴起来"),
|
| 115 |
+
gr.Audio(label="12. 从此以后, 他过上了幸福的生活"),
|
| 116 |
+
gr.Audio(label="13. 老山羊最后伤心地, 哭着走了出去"),
|
| 117 |
+
gr.Audio(label="14. 而且准备一��找下去, 直到他走不动为止"),
|
| 118 |
+
gr.Audio(label="15. 海马先生轻轻游过大海"),
|
| 119 |
+
]
|
| 120 |
+
with gr.Column(scale=1):
|
| 121 |
+
audio_lst4 = [
|
| 122 |
+
gr.Audio(label="16. 一起高高兴兴地, 回到了他们的爸爸妈妈身边"),
|
| 123 |
+
gr.Audio(label="17. 艾丽莎很小不能去上学, 但她有一个非常贵重精美的画册"),
|
| 124 |
+
gr.Audio(label="18. 狮子还是够不着, 它叫来了狐狸"),
|
| 125 |
+
gr.Audio(label="19. 姑娘坐到国王的马车上, 和国王一起回到宫中"),
|
| 126 |
+
gr.Audio(label="20. 温妮大叫了起来, 现在我们该怎么回家呀"),
|
| 127 |
+
]
|
| 128 |
+
audio_list = audio_lst1 + audio_lst2 + audio_lst3 + audio_lst4
|
| 129 |
+
|
| 130 |
+
train_res = gr.Textbox(label="训练结果", value="当前无训练任务")
|
| 131 |
+
|
| 132 |
+
training_button = gr.Button("开始训练")
|
| 133 |
+
|
| 134 |
+
training_button.click(launch_training_task,inputs=audio_list,outputs=train_res)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
gr.Markdown("# 快来和模型对话吧")
|
| 138 |
+
|
| 139 |
+
with gr.Tab("麦克风"):
|
| 140 |
+
in_audio = gr.Audio(source='microphone', type='filepath')
|
| 141 |
+
button = gr.Button("开始运行", variant="primary")
|
| 142 |
+
output_txt = gr.Textbox(label='文本')
|
| 143 |
+
output_label = gr.Audio(label='音频')
|
| 144 |
+
button.click(greet,
|
| 145 |
+
inputs=in_audio,
|
| 146 |
+
outputs=[output_txt, output_label])
|
| 147 |
+
|
| 148 |
+
with gr.Tab("上传音频"):
|
| 149 |
+
in_audio = gr.Audio(type='filepath')
|
| 150 |
+
button = gr.Button("开始运行", variant="primary")
|
| 151 |
+
output_txt = gr.Textbox(label='文本')
|
| 152 |
+
output_label = gr.Audio(label='音频')
|
| 153 |
+
button.click(greet,
|
| 154 |
+
inputs=in_audio,
|
| 155 |
+
outputs=[output_txt, output_label])
|
| 156 |
+
|
| 157 |
+
gr.close_all()
|
| 158 |
+
demo.queue(concurrency_count=5)
|
| 159 |
+
demo.launch()
|