roninpb commited on
Commit
9bd9adc
·
1 Parent(s): a7fdd62

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modelscope.pipelines import pipeline
2
+ from modelscope.utils.constant import Tasks
3
+ import gradio as gr
4
+ import os
5
+ import shutil
6
+ import os.path as osp
7
+ from modelscope.tools import run_auto_label
8
+ from modelscope.metainfo import Trainers
9
+ from modelscope.trainers import build_trainer
10
+ from modelscope.utils.audio.audio_utils import TtsTrainType
11
+ from modelscope.models.audio.tts import SambertHifigan
12
+ import IPython.display as ipd
13
+
14
+ def launch_training_task(*audio_lst):
15
+ os.makedirs("/tmp/test_wavs")
16
+ os.makedirs("/tmp/output_training_data")
17
+ os.makedirs("/tmp/pretrain_work_dir")
18
+ for num in range(len(audio_lst)):
19
+ if num<10:
20
+ shutil.copy(audio_lst[num], "/tmp/test_wavs/01_00000"+str(num)+".wav")
21
+ else:
22
+ shutil.copy(audio_lst[num], "/tmp/test_wavs/01_0000"+str(num)+".wav")
23
+
24
+ input_wav = "/tmp/test_wavs/"
25
+ output_data = "/tmp/output_training_data/"
26
+ ret, report = run_auto_label(input_wav=input_wav, work_dir=output_data, resource_revision="v1.0.7")
27
+
28
+ pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'
29
+ dataset_id = "/tmp/output_training_data/"
30
+ pretrain_work_dir = "/tmp/pretrain_work_dir/"
31
+
32
+ train_info = {
33
+ TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型
34
+ 'train_steps': 202, # 训练多少个step
35
+ 'save_interval_steps': 200, # 每训练多少个step保存一次checkpoint
36
+ 'log_interval': 10 # 每训练多少个step打印一次训练日志
37
+ }
38
+ }
39
+
40
+ kwargs = dict(
41
+ model=pretrained_model_id, # 指定要finetune的模型
42
+ model_revision = "v1.0.6",
43
+ work_dir=pretrain_work_dir, # 指定临时工作目录
44
+ train_dataset=dataset_id, # 指定数据集id
45
+ train_type=train_info # 指定要训练类型及参数
46
+ )
47
+
48
+ trainer = build_trainer(Trainers.speech_kantts_trainer,default_args=kwargs)
49
+
50
+ trainer.train()
51
+
52
+ return "已训练完成"
53
+
54
+
55
+ def greet(audio):
56
+ print(audio)
57
+ inference_pipeline = pipeline(task=Tasks.auto_speech_recognition, model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch')
58
+ rec_result = inference_pipeline(audio_in=audio)
59
+ input_gpt3 = rec_result['text']+"|"
60
+
61
+ text_generation_zh = pipeline(Tasks.text_generation, model='damo/nlp_gpt3_text-generation_chinese-base')
62
+ result_gpt = text_generation_zh(input_gpt3)
63
+
64
+ input_pttp = result_gpt[result_gpt.find("|")+1:]
65
+
66
+ model_dir = os.path.abspath("/tmp/pretrain_work_dir")
67
+
68
+ custom_infer_abs = {
69
+ 'voice_name':
70
+ 'F7',
71
+ 'am_ckpt':
72
+ os.path.join(model_dir, 'tmp_am', 'ckpt'),
73
+ 'am_config':
74
+ os.path.join(model_dir, 'tmp_am', 'config.yaml'),
75
+ 'voc_ckpt':
76
+ os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
77
+ 'voc_config':
78
+ os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
79
+ 'config.yaml'),
80
+ 'audio_config':
81
+ os.path.join(model_dir, 'data', 'audio_config.yaml'),
82
+ 'se_file':
83
+ os.path.join(model_dir, 'data', 'se', 'se.npy')
84
+ }
85
+ kwargs = {'custom_ckpt': custom_infer_abs}
86
+
87
+ model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs)
88
+
89
+ inference = pipeline(task=Tasks.text_to_speech, model=model_id)
90
+ output = inference(input=input_pttp)
91
+
92
+ return rec_result['text'],ipd.Audio(output["output_wav"], rate=16000)
93
+
94
+ with gr.Blocks() as demo:
95
+ with gr.Row():
96
+ with gr.Column(scale=1):
97
+ audio_lst1 = [
98
+ gr.Audio(label="1. 希望我们大家都能像他一样"),
99
+ gr.Audio(label="2. 不行, 他想了一下, 我不能这样对国王说, 这是在撒谎"),
100
+ gr.Audio(label="3. 但他们非常和气地问她说, 你叫什么名字"),
101
+ gr.Audio(label="4. 鸭子心想, 我必须去拿回我的软糖豆"),
102
+ gr.Audio(label="5. 小朋友, 你们不要再欺负它了"),
103
+ ]
104
+ with gr.Column(scale=1):
105
+ audio_lst2 = [
106
+ gr.Audio(label="6. 可是, 小黄鸭并不怕他们"),
107
+ gr.Audio(label="7. 然后, 他们一起走了很长一段时间"),
108
+ gr.Audio(label="8. 突然, 墙壁后面传来一阵声音"),
109
+ gr.Audio(label="9. 结果盘子掉在地上, 打得粉碎"),
110
+ gr.Audio(label="10. 四个小伙伴很开心, 一起感谢小松鼠的帮助"),
111
+ ]
112
+ with gr.Column(scale=1):
113
+ audio_lst3 = [
114
+ gr.Audio(label="11. 不过, 当他看到拇指姑娘的时候, 他马上就变得高兴起来"),
115
+ gr.Audio(label="12. 从此以后, 他过上了幸福的生活"),
116
+ gr.Audio(label="13. 老山羊最后伤心地, 哭着走了出去"),
117
+ gr.Audio(label="14. 而且准备一��找下去, 直到他走不动为止"),
118
+ gr.Audio(label="15. 海马先生轻轻游过大海"),
119
+ ]
120
+ with gr.Column(scale=1):
121
+ audio_lst4 = [
122
+ gr.Audio(label="16. 一起高高兴兴地, 回到了他们的爸爸妈妈身边"),
123
+ gr.Audio(label="17. 艾丽莎很小不能去上学, 但她有一个非常贵重精美的画册"),
124
+ gr.Audio(label="18. 狮子还是够不着, 它叫来了狐狸"),
125
+ gr.Audio(label="19. 姑娘坐到国王的马车上, 和国王一起回到宫中"),
126
+ gr.Audio(label="20. 温妮大叫了起来, 现在我们该怎么回家呀"),
127
+ ]
128
+ audio_list = audio_lst1 + audio_lst2 + audio_lst3 + audio_lst4
129
+
130
+ train_res = gr.Textbox(label="训练结果", value="当前无训练任务")
131
+
132
+ training_button = gr.Button("开始训练")
133
+
134
+ training_button.click(launch_training_task,inputs=audio_list,outputs=train_res)
135
+
136
+
137
+ gr.Markdown("# 快来和模型对话吧")
138
+
139
+ with gr.Tab("麦克风"):
140
+ in_audio = gr.Audio(source='microphone', type='filepath')
141
+ button = gr.Button("开始运行", variant="primary")
142
+ output_txt = gr.Textbox(label='文本')
143
+ output_label = gr.Audio(label='音频')
144
+ button.click(greet,
145
+ inputs=in_audio,
146
+ outputs=[output_txt, output_label])
147
+
148
+ with gr.Tab("上传音频"):
149
+ in_audio = gr.Audio(type='filepath')
150
+ button = gr.Button("开始运行", variant="primary")
151
+ output_txt = gr.Textbox(label='文本')
152
+ output_label = gr.Audio(label='音频')
153
+ button.click(greet,
154
+ inputs=in_audio,
155
+ outputs=[output_txt, output_label])
156
+
157
+ gr.close_all()
158
+ demo.queue(concurrency_count=5)
159
+ demo.launch()