redstoneleo commited on
Commit
9a21e55
·
verified ·
1 Parent(s): a4a2966

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -217
app.py CHANGED
@@ -1,217 +1,218 @@
1
- import gradio as gr
2
- import edge_tts
3
- import aiohttp
4
-
5
- import asyncio
6
- import tempfile, time
7
- from typing import TypedDict, List, Dict
8
-
9
-
10
-
11
-
12
- class VoiceTag(TypedDict, total=False):
13
- ContentCategories: List[str]
14
- VoicePersonalities: List[str]
15
-
16
-
17
- class VoiceMetadata(TypedDict, total=False):
18
- Name: str
19
- ShortName: str
20
- Gender: str
21
- Locale: str
22
- SuggestedCodec: str
23
- FriendlyName: str
24
- Status: str
25
- VoiceTag: VoiceTag
26
-
27
-
28
- async def list_voices() -> Dict[str, VoiceMetadata]:
29
- """
30
- Fetch available voices from Microsoft Edge TTS.
31
-
32
- Returns:
33
- Dict[str, VoiceMetadata]: A dictionary mapping display names
34
- (e.g. "en-US-JennyNeural - Female") to their corresponding in the form like { 'Name': 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)', 'ShortName': 'zh-CN-XiaoxiaoNeural', 'Gender': 'Female', 'Locale': 'zh-CN', 'SuggestedCodec': 'audio-24khz-48kbitrate-mono-mp3', 'FriendlyName': 'Microsoft Xiaoxiao Online (Natural) - Chinese (Mainland)', 'Status': 'GA', 'VoiceTag': { 'ContentCategories': ['News', 'Novel'], 'VoicePersonalities': ['Warm'] } }
35
- """
36
-
37
-
38
-
39
- priority_order = [
40
- "zh", # 中文优先
41
- "en-US", # 美式英语
42
- "en-GB", # 英式英语
43
- "en", # 其他英语
44
- "hi", # 印地语
45
- "es", # 西班牙语
46
- "ar", # 现代标准阿拉伯语
47
- "fr", # 法语
48
- "bn", # 孟加拉语
49
- "pt", # 葡萄牙语
50
- "ru", # 俄语
51
- "id", # 印尼语
52
- "ur", # 乌尔都语
53
- "de", # 德语
54
- "ja", # 日语
55
- "pcm", # 尼日利亚皮钦语
56
- "ar-EG", # 埃及阿拉伯语
57
- "mr", # 马拉地语
58
- "vi", # 越南语
59
- "te", # 泰卢固语
60
- "ha", # 豪萨语
61
- "tr", # 土耳其语
62
- "pa", # 西旁遮普语
63
- "sw", # 斯瓦希里语
64
- "fil", # 他加禄语
65
- "ta", # 泰米尔语
66
- "yue", # 粤语
67
- "wuu", # 吴语
68
- "fa", # 波斯语
69
- "ko", # 韩语
70
- "th", # 泰语
71
- "jv", # 爪哇语
72
- ]
73
-
74
- lang_priority = {lang: i for i, lang in enumerate(priority_order)}
75
-
76
- def get_priority(short_name: str) -> tuple[int, str]:
77
- """
78
- Return (priority_rank, short_name) for stable sorting.
79
- """
80
- for prefix, rank in lang_priority.items():
81
- if short_name.startswith(prefix):
82
- return (rank, short_name)
83
- return (len(priority_order), short_name) # 其他语言排最后,内部按 short_name
84
-
85
- n = 5
86
- for i in range(n):
87
- try:
88
- voices = await edge_tts.list_voices()
89
- break
90
- except Exception as e:
91
- time.sleep(1 * i)
92
- print("Retrying due to handshake error:", e)
93
- if i == n - 1:
94
- raise e
95
-
96
- # ✅ 先按优先级,再按 short_name 排序,保证同类不乱
97
- voices_sorted = sorted(voices, key=lambda v: get_priority(v["ShortName"]))
98
-
99
- return {f"{v['ShortName']} - {v['Gender']}": v for v in voices_sorted}
100
-
101
-
102
- async def text_to_speech(text: str, voice: str, rate: int = 0, pitch: int = 0) -> str:
103
- """
104
- Convert input text to speech using Microsoft Edge TTS.
105
-
106
- Args:
107
- text (str): The text to synthesize into speech.
108
- voice (str): The selected voice in the format "ShortName - Gender".
109
- rate (int, optional): Speech rate adjustment percentage. Default is 0.
110
- pitch (int, optional): Pitch adjustment in Hz. Default is 0.
111
-
112
- Returns:
113
- str: Path to the generated MP3 file.
114
- """
115
-
116
- voice_short_name = voice.split(" - ")[0]
117
- rate_str = f"{rate:+d}%"
118
- pitch_str = f"{pitch:+d}Hz"
119
-
120
- n = 5
121
- for i in range(n):
122
- try:
123
- communicate = edge_tts.Communicate(
124
- text, voice_short_name, rate=rate_str, pitch=pitch_str
125
- )
126
- break
127
- except aiohttp.client_exceptions.WSServerHandshakeError as e:
128
- time.sleep(1 * i)
129
- print("Retrying due to handshake error:", e)
130
- if i == n - 1:
131
- raise e
132
-
133
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
134
- tmp_path = tmp_file.name
135
- await communicate.save(tmp_path)
136
-
137
- return tmp_path#后面output到gr.Audio上,通过程序交互的时候gradio会将其转化为url,很神奇!
138
-
139
-
140
- i18n = gr.I18n(
141
- en={"pageTitle": "# 🎙️ Microsoft Online Edge TTS & MCP Server",
142
- "generateSpeech": "Generate Speech",
143
- 'description':"""Convert text to speech using the free [Microsoft Edge TTS](https://github.com/rany2/edge-tts), API and MCP are available. Made by [MathJoy](https://www.cnblogs.com/imath).""",
144
- "selectVoice":"Select Voice",
145
- "speechRateAdjustment":"Speech Rate Adjustment (%)",
146
- "pitchAdjustment":"Pitch Adjustment (Hz)",
147
- "inputText":"Input Text"
148
- },
149
- zh={"pageTitle": "# 🎙️ 微软线上 Edge TTS & MCP",
150
- "generateSpeech": "生成语音",
151
- 'description':"""文本语音合成,基于免费的[Microsoft Edge TTS](https://github.com/rany2/edge-tts),由[MathJoy](https://www.cnblogs.com/imath)精心制作,也可通过API或MCP来使用。""",
152
- "selectVoice":"选择",
153
- "speechRateAdjustment":"语速调整 (%)",
154
- "pitchAdjustment":"音量调整 (Hz)",
155
- "inputText":"输入文字"
156
- }
157
- )
158
-
159
- async def create_UI():
160
-
161
-
162
- voices = await list_voices()
163
-
164
- with gr.Blocks(title="Microsoft Edge TTS & MCP",analytics_enabled=False) as UI:
165
- gr.api(#默认只在fn处暴露工具,这里主动暴露这个tool
166
- list_voices
167
- )
168
-
169
- gr.Markdown(i18n("pageTitle"))
170
-
171
- with gr.Row():
172
- with gr.Column(scale=1):
173
- gr.Markdown(i18n("description"))
174
-
175
- with gr.Row():
176
- with gr.Column():
177
- text_input = gr.Textbox(label=i18n("inputText"), lines=5,value='Your text here')
178
-
179
- voiceList=list(voices.keys())
180
- voice_dropdown = gr.Dropdown(
181
- choices=voiceList,
182
- label=i18n("selectVoice"),
183
- value=voiceList[0])
184
-
185
- rate_slider = gr.Slider(
186
- minimum=-50, maximum=50, value=0,
187
- label=i18n("speechRateAdjustment"), step=1
188
- )
189
-
190
- pitch_slider = gr.Slider(
191
- minimum=-20, maximum=20, value=0,
192
- label=i18n("pitchAdjustment"), step=1
193
- )
194
-
195
- generate_btn = gr.Button(i18n("generateSpeech"), variant="primary")
196
- audio_output = gr.Audio(label="Generated Audio", type="filepath",autoplay=True)
197
-
198
- generate_btn.click(
199
- fn=text_to_speech,
200
- inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
201
- outputs=[audio_output]
202
- )
203
-
204
- return UI
205
-
206
-
207
- async def main():
208
- UI = await create_UI()
209
- UI.queue(default_concurrency_limit=50)
210
- UI.launch(
211
- # show_api=False,
212
- i18n=i18n,
213
- mcp_server=True # ✅ make this an MCP server
214
- )
215
-
216
- if __name__ == "__main__":
217
- asyncio.run(main())
 
 
1
+ import gradio as gr
2
+ import edge_tts
3
+ import aiohttp
4
+
5
+ import asyncio
6
+ import tempfile, time
7
+ from typing import TypedDict, List, Dict
8
+ from pathlib import Path
9
+
10
+
11
+
12
+ class VoiceTag(TypedDict, total=False):
13
+ ContentCategories: List[str]
14
+ VoicePersonalities: List[str]
15
+
16
+
17
+ class VoiceMetadata(TypedDict, total=False):
18
+ Name: str
19
+ ShortName: str
20
+ Gender: str
21
+ Locale: str
22
+ SuggestedCodec: str
23
+ FriendlyName: str
24
+ Status: str
25
+ VoiceTag: VoiceTag
26
+
27
+
28
+ async def list_voices() -> Dict[str, VoiceMetadata]:
29
+ """
30
+ Fetch available voices from Microsoft Edge TTS.
31
+
32
+ Returns:
33
+ Dict[str, VoiceMetadata]: A dictionary mapping display names
34
+ (e.g. "en-US-JennyNeural - Female") to their corresponding in the form like { 'Name': 'Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)', 'ShortName': 'zh-CN-XiaoxiaoNeural', 'Gender': 'Female', 'Locale': 'zh-CN', 'SuggestedCodec': 'audio-24khz-48kbitrate-mono-mp3', 'FriendlyName': 'Microsoft Xiaoxiao Online (Natural) - Chinese (Mainland)', 'Status': 'GA', 'VoiceTag': { 'ContentCategories': ['News', 'Novel'], 'VoicePersonalities': ['Warm'] } }
35
+ """
36
+
37
+
38
+
39
+ priority_order = [
40
+ "zh", # 中文优先
41
+ "en-US", # 美式英语
42
+ "en-GB", # 英式英语
43
+ "en", # 其他英语
44
+ "hi", # 印地语
45
+ "es", # 西班牙语
46
+ "ar", # 现代标准阿拉伯语
47
+ "fr", # 法语
48
+ "bn", # 孟加拉语
49
+ "pt", # 葡萄牙语
50
+ "ru", # 俄语
51
+ "id", # 印尼语
52
+ "ur", # 乌尔都语
53
+ "de", # 德语
54
+ "ja", # 日语
55
+ "pcm", # 尼日利亚皮钦语
56
+ "ar-EG", # 埃及阿拉伯语
57
+ "mr", # 马拉地语
58
+ "vi", # 越南语
59
+ "te", # 泰卢固语
60
+ "ha", # 豪萨语
61
+ "tr", # 土耳其语
62
+ "pa", # 西旁遮普语
63
+ "sw", # 斯瓦希里语
64
+ "fil", # 他加禄语
65
+ "ta", # 泰米尔语
66
+ "yue", # 粤语
67
+ "wuu", # 吴语
68
+ "fa", # 波斯语
69
+ "ko", # 韩语
70
+ "th", # 泰语
71
+ "jv", # 爪哇语
72
+ ]
73
+
74
+ lang_priority = {lang: i for i, lang in enumerate(priority_order)}
75
+
76
+ def get_priority(short_name: str) -> tuple[int, str]:
77
+ """
78
+ Return (priority_rank, short_name) for stable sorting.
79
+ """
80
+ for prefix, rank in lang_priority.items():
81
+ if short_name.startswith(prefix):
82
+ return (rank, short_name)
83
+ return (len(priority_order), short_name) # 其他语言排最后,内部按 short_name
84
+
85
+ n = 5
86
+ for i in range(n):
87
+ try:
88
+ voices = await edge_tts.list_voices()
89
+ break
90
+ except Exception as e:
91
+ time.sleep(1 * i)
92
+ print("Retrying due to handshake error:", e)
93
+ if i == n - 1:
94
+ raise e
95
+
96
+ # ✅ 先按优先级,再按 short_name 排序,保证同类不乱
97
+ voices_sorted = sorted(voices, key=lambda v: get_priority(v["ShortName"]))
98
+
99
+ return {f"{v['ShortName']} - {v['Gender']}": v for v in voices_sorted}
100
+
101
+
102
+ async def text_to_speech(text: str, voice: str, rate: int = 0, pitch: int = 0) -> str:
103
+ """
104
+ Convert input text to speech using Microsoft Edge TTS.
105
+
106
+ Args:
107
+ text (str): The text to synthesize into speech.
108
+ voice (str): The selected voice in the format "ShortName - Gender".
109
+ rate (int, optional): Speech rate adjustment percentage. Default is 0.
110
+ pitch (int, optional): Pitch adjustment in Hz. Default is 0.
111
+
112
+ Returns:
113
+ str: Path to the generated MP3 file.
114
+ """
115
+
116
+ voice_short_name = voice.split(" - ")[0]
117
+ rate_str = f"{rate:+d}%"
118
+ pitch_str = f"{pitch:+d}Hz"
119
+
120
+ n = 5
121
+ for i in range(n):
122
+ try:
123
+ communicate = edge_tts.Communicate(
124
+ text, voice_short_name, rate=rate_str, pitch=pitch_str
125
+ )
126
+ break
127
+ except aiohttp.client_exceptions.WSServerHandshakeError as e:
128
+ time.sleep(1 * i)
129
+ print("Retrying due to handshake error:", e)
130
+ if i == n - 1:
131
+ raise e
132
+
133
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
134
+ tmp_path = tmp_file.name
135
+ await communicate.save(tmp_path)
136
+
137
+ path = Path(tmp_path)
138
+ return path.as_posix()#后面output到gr.Audio上,通过程序交互的时候gradio会将其转化为url,很神奇!
139
+
140
+
141
+ i18n = gr.I18n(
142
+ en={"pageTitle": "# 🎙️ Microsoft Online Edge TTS & MCP Server",
143
+ "generateSpeech": "Generate Speech",
144
+ 'description':"""Convert text to speech using the free [Microsoft Edge TTS](https://github.com/rany2/edge-tts), API and MCP are available. Made by [MathJoy](https://www.cnblogs.com/imath).""",
145
+ "selectVoice":"Select Voice",
146
+ "speechRateAdjustment":"Speech Rate Adjustment (%)",
147
+ "pitchAdjustment":"Pitch Adjustment (Hz)",
148
+ "inputText":"Input Text"
149
+ },
150
+ zh={"pageTitle": "# 🎙️ 微软线上 Edge TTS & MCP",
151
+ "generateSpeech": "生成语音",
152
+ 'description':"""文本语合成,基于免费的[Microsoft Edge TTS](https://github.com/rany2/edge-tts),由[MathJoy](https://www.cnblogs.com/imath)精心制作,也可通过API或MCP来使用。""",
153
+ "selectVoice":"选择音色",
154
+ "speechRateAdjustment":"语速调整 (%)",
155
+ "pitchAdjustment":"音量调整 (Hz)",
156
+ "inputText":"输入文字"
157
+ }
158
+ )
159
+
160
+ async def create_UI():
161
+
162
+
163
+ voices = await list_voices()
164
+
165
+ with gr.Blocks(title="Microsoft Edge TTS & MCP",analytics_enabled=False) as UI:
166
+ gr.api(#默认只在fn处暴露工具,这里主动暴露这个tool
167
+ list_voices
168
+ )
169
+
170
+ gr.Markdown(i18n("pageTitle"))
171
+
172
+ with gr.Row():
173
+ with gr.Column(scale=1):
174
+ gr.Markdown(i18n("description"))
175
+
176
+ with gr.Row():
177
+ with gr.Column():
178
+ text_input = gr.Textbox(label=i18n("inputText"), lines=5,value='Your text here')
179
+
180
+ voiceList=list(voices.keys())
181
+ voice_dropdown = gr.Dropdown(
182
+ choices=voiceList,
183
+ label=i18n("selectVoice"),
184
+ value=voiceList[0])
185
+
186
+ rate_slider = gr.Slider(
187
+ minimum=-50, maximum=50, value=0,
188
+ label=i18n("speechRateAdjustment"), step=1
189
+ )
190
+
191
+ pitch_slider = gr.Slider(
192
+ minimum=-20, maximum=20, value=0,
193
+ label=i18n("pitchAdjustment"), step=1
194
+ )
195
+
196
+ generate_btn = gr.Button(i18n("generateSpeech"), variant="primary")
197
+ audio_output = gr.Audio(label="Generated Audio", type="filepath",autoplay=True)
198
+
199
+ generate_btn.click(
200
+ fn=text_to_speech,
201
+ inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
202
+ outputs=[audio_output]
203
+ )
204
+
205
+ return UI
206
+
207
+
208
+ async def main():
209
+ UI = await create_UI()
210
+ UI.queue(default_concurrency_limit=50)
211
+ UI.launch(
212
+ # show_api=False,
213
+ i18n=i18n,
214
+ mcp_server=True # ✅ make this an MCP server
215
+ )
216
+
217
+ if __name__ == "__main__":
218
+ asyncio.run(main())