leesenx commited on
Commit
7746634
·
verified ·
1 Parent(s): ff4e672

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -48
app.py CHANGED
@@ -1,6 +1,7 @@
1
  #!/usr/bin/env python3
2
 
3
  import os
 
4
  import time
5
  import uuid
6
  from datetime import datetime
@@ -19,9 +20,14 @@ def MyPrint(s):
19
  print(f"{date_time}: {s}")
20
 
21
 
22
- title = "# 文字转语音 (TTS)"
 
 
 
 
 
23
 
24
- description = ""
25
 
26
  css = """
27
  .result {display:flex;flex-direction:column}
@@ -34,41 +40,49 @@ css = """
34
  def update_model_dropdown(language: str):
35
  if language in language_to_models:
36
  choices = language_to_models[language]
 
 
37
  return gr.Dropdown(
38
  choices=choices,
39
  value=choices[0],
40
  interactive=True,
41
- )
42
- raise ValueError(f"Unsupported language: {language}")
 
 
 
 
 
 
 
43
 
44
 
45
  def build_html_output(s: str, style: str = "result_item_success"):
46
  return f"""
47
- <div class='result'>
48
- <div class='result_item {style}'>
49
- {s}
50
- </div>
51
- </div>
52
- """
53
 
54
 
55
  def process(language: str, repo_id: str, text: str, sid: str, speed: float):
56
  max_len = 4000
57
- MyPrint(f"输入文本长度 {len(text)}: {text[:max_len]}. 说话人ID: {sid}, 语速: {speed}")
58
 
59
  if len(text) > max_len:
60
  MyPrint(f"文本过长!{len(text)}")
61
- info = """
62
- 为保证响应速度,请使用短文本进行测试。<br/>
63
- 如需处理长文本,请在本地运行本程序。<br/>
64
- 参考 https://k2-fsa.github.io/sherpa/onnx/
65
- """
66
  return None, build_html_output(info)
67
 
 
68
  if sid is None or str(sid).strip() == "":
69
  sid = 0
70
  else:
71
  sid = int(sid)
 
 
72
 
73
  tts = get_pretrained_model(repo_id, speed)
74
 
@@ -77,19 +91,18 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
77
  end = time.time()
78
 
79
  if len(audio.samples) == 0:
80
- raise ValueError(
81
- "语音生成出错,请查看上方错误信息。"
82
- )
83
 
84
  duration = len(audio.samples) / audio.sample_rate
85
  elapsed_seconds = end - start
86
  rtf = elapsed_seconds / duration
87
 
88
  info = f"""
89
- Wave duration : {duration:.3f} s <br/>
90
- Processing time: {elapsed_seconds:.3f} s <br/>
91
- RTF: {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f} <br/>
92
- """
 
93
 
94
  MyPrint(info)
95
  MyPrint(f"\nrepo_id: {repo_id}\ntext: {text}\nsid: {sid}\nspeed: {speed}")
@@ -120,10 +133,32 @@ with demo:
120
  label="选择模型",
121
  value=language_to_models[language_choices[0]][0],
122
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  language_radio.change(
124
  update_model_dropdown,
125
  inputs=language_radio,
126
- outputs=model_dropdown,
 
 
 
 
 
127
  )
128
  with gr.Tabs():
129
  with gr.TabItem("输入文本"):
@@ -134,14 +169,6 @@ with demo:
134
  value="大家好,这是一个文字转语音的测试。",
135
  placeholder="请输入要转换为语音的文字",
136
  )
137
- input_sid = gr.Textbox(
138
- label="说话人编号",
139
- info="多说话人模型可用(编号从0开始),单说话人模型填0即可",
140
- lines=1,
141
- max_lines=1,
142
- value="0",
143
- placeholder="0",
144
- )
145
  input_speed = gr.Slider(
146
  minimum=0.1,
147
  maximum=10,
@@ -153,22 +180,20 @@ with demo:
153
  output_audio = gr.Audio(label="生成的语音")
154
  output_info = gr.HTML(label="信息")
155
 
156
- input_button.click(
157
- process,
158
- inputs=[
159
- language_radio,
160
- model_dropdown,
161
- input_text,
162
- input_sid,
163
- input_speed,
164
- ],
165
- outputs=[
166
- output_audio,
167
- output_info,
168
- ],
169
- )
170
-
171
- gr.Markdown(description)
172
 
173
 
174
  def download_espeak_ng_data():
 
1
  #!/usr/bin/env python3
2
 
3
  import os
4
+ import re
5
  import time
6
  import uuid
7
  from datetime import datetime
 
20
  print(f"{date_time}: {s}")
21
 
22
 
23
+ def get_num_speakers(repo_id: str) -> int:
24
+ m = re.search(r"\|(\d+)\s*speaker", repo_id)
25
+ if m:
26
+ return int(m.group(1))
27
+ return 1
28
+
29
 
30
+ title = "# 文字转语音 (TTS)"
31
 
32
  css = """
33
  .result {display:flex;flex-direction:column}
 
40
  def update_model_dropdown(language: str):
41
  if language in language_to_models:
42
  choices = language_to_models[language]
43
+ n = get_num_speakers(choices[0])
44
+ sid_info = f"当前模型有 {n} 个说话人,编号范围 0~{n - 1}" if n > 1 else "当前模型只有1个说话人"
45
  return gr.Dropdown(
46
  choices=choices,
47
  value=choices[0],
48
  interactive=True,
49
+ ), sid_info, gr.Textbox(visible=n > 1)
50
+ raise ValueError(f"不支持的语言: {language}")
51
+
52
+
53
+ def update_sid_info(repo_id: str):
54
+ n = get_num_speakers(repo_id)
55
+ if n > 1:
56
+ return f"当前模型有 {n} 个说话人,编号范围 0~{n - 1}", gr.Textbox(visible=True)
57
+ return "当前模型只有1个说话人", gr.Textbox(visible=False)
58
 
59
 
60
  def build_html_output(s: str, style: str = "result_item_success"):
61
  return f"""
62
+ <div class='result'>
63
+ <div class='result_item {style}'>
64
+ {s}
65
+ </div>
66
+ </div>
67
+ """
68
 
69
 
70
  def process(language: str, repo_id: str, text: str, sid: str, speed: float):
71
  max_len = 4000
72
+ MyPrint(f"输入文本长度 {len(text)}: {text[:max_len]}. 说话人编号: {sid}, 语速: {speed}")
73
 
74
  if len(text) > max_len:
75
  MyPrint(f"文本过长!{len(text)}")
76
+ info = "为保证响应速度,请使用短文本进行测试。如需处理长文本,请在本地运行。"
 
 
 
 
77
  return None, build_html_output(info)
78
 
79
+ n = get_num_speakers(repo_id)
80
  if sid is None or str(sid).strip() == "":
81
  sid = 0
82
  else:
83
  sid = int(sid)
84
+ if n > 1 and sid >= n:
85
+ sid = n - 1
86
 
87
  tts = get_pretrained_model(repo_id, speed)
88
 
 
91
  end = time.time()
92
 
93
  if len(audio.samples) == 0:
94
+ raise ValueError("语音生成出错,请查看上方错误信息。")
 
 
95
 
96
  duration = len(audio.samples) / audio.sample_rate
97
  elapsed_seconds = end - start
98
  rtf = elapsed_seconds / duration
99
 
100
  info = f"""
101
+ 音频时长: {duration:.3f} <br/>
102
+ 处理时间: {elapsed_seconds:.3f} <br/>
103
+ 实时率(RTF): {rtf:.3f}<br/>
104
+ 说话人编号: {sid}
105
+ """
106
 
107
  MyPrint(info)
108
  MyPrint(f"\nrepo_id: {repo_id}\ntext: {text}\nsid: {sid}\nspeed: {speed}")
 
133
  label="选择模型",
134
  value=language_to_models[language_choices[0]][0],
135
  )
136
+ first_model = language_to_models[language_choices[0]][0]
137
+ first_n = get_num_speakers(first_model)
138
+ sid_info_text = gr.Textbox(
139
+ value=f"当前模型有 {first_n} 个说话人,编号范围 0~{first_n - 1}" if first_n > 1 else "当前模型只有1个说话人",
140
+ label="说话人信息",
141
+ interactive=False,
142
+ visible=True,
143
+ )
144
+ input_sid = gr.Textbox(
145
+ label="说话人编号",
146
+ info="编号从0开始",
147
+ lines=1,
148
+ max_lines=1,
149
+ value="0",
150
+ placeholder="0",
151
+ visible=first_n > 1,
152
+ )
153
  language_radio.change(
154
  update_model_dropdown,
155
  inputs=language_radio,
156
+ outputs=[model_dropdown, sid_info_text, input_sid],
157
+ )
158
+ model_dropdown.change(
159
+ update_sid_info,
160
+ inputs=model_dropdown,
161
+ outputs=[sid_info_text, input_sid],
162
  )
163
  with gr.Tabs():
164
  with gr.TabItem("输入文本"):
 
169
  value="大家好,这是一个文字转语音的测试。",
170
  placeholder="请输入要转换为语音的文字",
171
  )
 
 
 
 
 
 
 
 
172
  input_speed = gr.Slider(
173
  minimum=0.1,
174
  maximum=10,
 
180
  output_audio = gr.Audio(label="生成的语音")
181
  output_info = gr.HTML(label="信息")
182
 
183
+ input_button.click(
184
+ process,
185
+ inputs=[
186
+ language_radio,
187
+ model_dropdown,
188
+ input_text,
189
+ input_sid,
190
+ input_speed,
191
+ ],
192
+ outputs=[
193
+ output_audio,
194
+ output_info,
195
+ ],
196
+ )
 
 
197
 
198
 
199
  def download_espeak_ng_data():