Upload 8 files
Browse files- app.py +60 -0
- check_hf_space.py +48 -0
- gradio_app.py +111 -0
- requirements.txt +5 -0
- test_edge_tts.py +44 -0
- test_output.mp3 +0 -0
- test_tts.py +46 -0
app.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException
|
| 2 |
+
from fastapi.responses import StreamingResponse
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
import edge_tts
|
| 6 |
+
import asyncio
|
| 7 |
+
|
| 8 |
+
app = FastAPI(title="Edge TTS API", description="基于Edge TTS的文本转语音API", version="1.0")
|
| 9 |
+
|
| 10 |
+
# 添加CORS支持
|
| 11 |
+
app.add_middleware(
|
| 12 |
+
CORSMiddleware,
|
| 13 |
+
allow_origins=["*"], # 允许所有来源,生产环境中应限制为特定域名
|
| 14 |
+
allow_credentials=True,
|
| 15 |
+
allow_methods=["*"], # 允许所有HTTP方法
|
| 16 |
+
allow_headers=["*"], # 允许所有HTTP头
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
class TTSRequest(BaseModel):
|
| 20 |
+
text: str
|
| 21 |
+
voice: str = "zh-CN-YunxiNeural"
|
| 22 |
+
rate: str = "+0%"
|
| 23 |
+
volume: str = "+0%"
|
| 24 |
+
|
| 25 |
+
@app.post("/tts", response_class=StreamingResponse, summary="文本转语音")
|
| 26 |
+
async def text_to_speech(request: TTSRequest):
|
| 27 |
+
try:
|
| 28 |
+
communicate = edge_tts.Communicate(request.text, request.voice, rate=request.rate, volume=request.volume)
|
| 29 |
+
|
| 30 |
+
# 先将所有音频数据收集到内存中
|
| 31 |
+
audio_data = b""
|
| 32 |
+
async for chunk in communicate.stream():
|
| 33 |
+
if chunk["type"] == "audio":
|
| 34 |
+
audio_data += chunk["data"]
|
| 35 |
+
|
| 36 |
+
# 然后返回完整的音频数据
|
| 37 |
+
return StreamingResponse(iter([audio_data]), media_type="audio/mpeg")
|
| 38 |
+
except Exception as e:
|
| 39 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 40 |
+
|
| 41 |
+
@app.get("/voices", summary="获取可用语音列表")
|
| 42 |
+
async def get_voices():
|
| 43 |
+
try:
|
| 44 |
+
voices = await edge_tts.list_voices()
|
| 45 |
+
return [{
|
| 46 |
+
"short_name": voice.get("ShortName", ""),
|
| 47 |
+
"friendly_name": voice.get("FriendlyName", voice.get("ShortName", "")),
|
| 48 |
+
"gender": voice.get("Gender", ""),
|
| 49 |
+
"locale": voice.get("Locale", "")
|
| 50 |
+
} for voice in voices]
|
| 51 |
+
except Exception as e:
|
| 52 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 53 |
+
|
| 54 |
+
@app.get("/", summary="首页")
|
| 55 |
+
def read_root():
|
| 56 |
+
return {"message": "欢迎使用Edge TTS API", "docs": "/docs"}
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
import uvicorn
|
| 60 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
check_hf_space.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
|
| 3 |
+
# 检查Space的文件结构
|
| 4 |
+
def check_space_files():
|
| 5 |
+
print("检查Hugging Face Space的文件结构...")
|
| 6 |
+
url = "https://huggingface.co/api/spaces/chaore/ttsedge/files"
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
response = requests.get(url)
|
| 10 |
+
if response.status_code == 200:
|
| 11 |
+
files = response.json()
|
| 12 |
+
print(f"Space中包含 {len(files)} 个文件:")
|
| 13 |
+
for file in files:
|
| 14 |
+
print(f" - {file['path']} (大小: {file.get('size', '未知')} 字节)")
|
| 15 |
+
return True
|
| 16 |
+
else:
|
| 17 |
+
print(f"获取文件列表失败: {response.status_code}")
|
| 18 |
+
print(f"响应内容: {response.text}")
|
| 19 |
+
return False
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"检查文件结构时出错: {e}")
|
| 22 |
+
return False
|
| 23 |
+
|
| 24 |
+
# 检查Space的状态
|
| 25 |
+
def check_space_status():
|
| 26 |
+
print("\n检查Space的状态...")
|
| 27 |
+
url = "https://huggingface.co/api/spaces/chaore/ttsedge"
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
response = requests.get(url)
|
| 31 |
+
if response.status_code == 200:
|
| 32 |
+
space_info = response.json()
|
| 33 |
+
print(f"Space名称: {space_info['name']}")
|
| 34 |
+
print(f"作者: {space_info['author']}")
|
| 35 |
+
print(f"状态: {space_info['status']}")
|
| 36 |
+
print(f"最后更新: {space_info['lastModified']}")
|
| 37 |
+
print(f"可见性: {space_info['private']}")
|
| 38 |
+
return True
|
| 39 |
+
else:
|
| 40 |
+
print(f"获取Space信息失败: {response.status_code}")
|
| 41 |
+
return False
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f"检查Space状态时出错: {e}")
|
| 44 |
+
return False
|
| 45 |
+
|
| 46 |
+
if __name__ == "__main__":
|
| 47 |
+
check_space_files()
|
| 48 |
+
check_space_status()
|
gradio_app.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import edge_tts
|
| 3 |
+
import asyncio
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# 定义生成音频的异步函数
|
| 7 |
+
async def generate_audio(text, voice, rate, volume):
|
| 8 |
+
# 创建输出文件路径
|
| 9 |
+
output_path = "output.mp3"
|
| 10 |
+
|
| 11 |
+
# 使用edge-tts生成音频
|
| 12 |
+
communicate = edge_tts.Communicate(text, voice, rate=rate, volume=volume)
|
| 13 |
+
await communicate.save(output_path)
|
| 14 |
+
|
| 15 |
+
return output_path
|
| 16 |
+
|
| 17 |
+
# 定义Gradio界面的主函数
|
| 18 |
+
def text_to_speech(text, voice, rate, volume):
|
| 19 |
+
# 调用异步函数生成音频
|
| 20 |
+
loop = asyncio.new_event_loop()
|
| 21 |
+
asyncio.set_event_loop(loop)
|
| 22 |
+
output_path = loop.run_until_complete(generate_audio(text, voice, rate, volume))
|
| 23 |
+
loop.close()
|
| 24 |
+
|
| 25 |
+
return output_path
|
| 26 |
+
|
| 27 |
+
# 异步获取所有可用语音
|
| 28 |
+
def get_voices():
|
| 29 |
+
loop = asyncio.new_event_loop()
|
| 30 |
+
asyncio.set_event_loop(loop)
|
| 31 |
+
voices = loop.run_until_complete(edge_tts.list_voices())
|
| 32 |
+
loop.close()
|
| 33 |
+
|
| 34 |
+
# 过滤出支持的语音,并按语言分组
|
| 35 |
+
voice_options = []
|
| 36 |
+
for voice in voices:
|
| 37 |
+
short_name = voice.get("ShortName", "")
|
| 38 |
+
friendly_name = voice.get("FriendlyName", short_name)
|
| 39 |
+
locale = voice.get("Locale", "")
|
| 40 |
+
voice_options.append((f"{friendly_name} ({locale})", short_name))
|
| 41 |
+
|
| 42 |
+
# 按友好名称排序
|
| 43 |
+
voice_options.sort(key=lambda x: x[0])
|
| 44 |
+
|
| 45 |
+
return voice_options
|
| 46 |
+
|
| 47 |
+
# 获取所有可用语音
|
| 48 |
+
voice_options = get_voices()
|
| 49 |
+
|
| 50 |
+
# 创建Gradio界面
|
| 51 |
+
with gr.Blocks(title="Edge TTS 配音工具") as demo:
|
| 52 |
+
gr.Markdown("# Edge TTS 配音工具")
|
| 53 |
+
gr.Markdown("输入文本,选择语音,调节语速和音量,然后点击合成按钮生成音频。")
|
| 54 |
+
|
| 55 |
+
with gr.Row():
|
| 56 |
+
with gr.Column(scale=2):
|
| 57 |
+
# 文本输入框
|
| 58 |
+
text_input = gr.Textbox(
|
| 59 |
+
label="输入文本",
|
| 60 |
+
placeholder="请输入要转换为语音的文本...",
|
| 61 |
+
lines=5,
|
| 62 |
+
max_lines=10
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# 语音选择下拉菜单
|
| 66 |
+
voice_dropdown = gr.Dropdown(
|
| 67 |
+
label="选择语音",
|
| 68 |
+
choices=voice_options,
|
| 69 |
+
value="zh-CN-YunxiNeural"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# 语速滑块
|
| 73 |
+
rate_slider = gr.Slider(
|
| 74 |
+
label="语速",
|
| 75 |
+
minimum=-50,
|
| 76 |
+
maximum=50,
|
| 77 |
+
step=5,
|
| 78 |
+
value=0,
|
| 79 |
+
info="-50% 到 +50%"
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# 音量滑块
|
| 83 |
+
volume_slider = gr.Slider(
|
| 84 |
+
label="音量",
|
| 85 |
+
minimum=-50,
|
| 86 |
+
maximum=50,
|
| 87 |
+
step=5,
|
| 88 |
+
value=0,
|
| 89 |
+
info="-50% 到 +50%"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# 合成按钮
|
| 93 |
+
generate_button = gr.Button("合成语音", variant="primary")
|
| 94 |
+
|
| 95 |
+
with gr.Column(scale=1):
|
| 96 |
+
# 音频输出
|
| 97 |
+
audio_output = gr.Audio(
|
| 98 |
+
label="合成音频",
|
| 99 |
+
type="filepath"
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# 设置按钮点击事件
|
| 103 |
+
generate_button.click(
|
| 104 |
+
fn=text_to_speech,
|
| 105 |
+
inputs=[text_input, voice_dropdown, rate_slider, volume_slider],
|
| 106 |
+
outputs=audio_output
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# 启动Gradio应用
|
| 110 |
+
if __name__ == "__main__":
|
| 111 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
edge-tts
|
| 2 |
+
fastapi
|
| 3 |
+
uvicorn
|
| 4 |
+
python-multipart
|
| 5 |
+
gradio
|
test_edge_tts.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import edge_tts
|
| 2 |
+
import asyncio
|
| 3 |
+
|
| 4 |
+
async def test_edge_tts():
|
| 5 |
+
print("测试Edge TTS库...")
|
| 6 |
+
try:
|
| 7 |
+
# 测试列出语音
|
| 8 |
+
voices = await edge_tts.list_voices()
|
| 9 |
+
print(f"获取到 {len(voices)} 种语音")
|
| 10 |
+
|
| 11 |
+
# 测试语音合成
|
| 12 |
+
text = "Welcome to Edge TTS"
|
| 13 |
+
voice = "en-US-JennyNeural"
|
| 14 |
+
|
| 15 |
+
print(f"\n正在合成语音: {text}")
|
| 16 |
+
print(f"使用语音: {voice}")
|
| 17 |
+
|
| 18 |
+
communicate = edge_tts.Communicate(text, voice)
|
| 19 |
+
audio_data = b""
|
| 20 |
+
|
| 21 |
+
async for chunk in communicate.stream():
|
| 22 |
+
if chunk["type"] == "audio":
|
| 23 |
+
audio_data += chunk["data"]
|
| 24 |
+
elif chunk["type"] == "WordBoundary":
|
| 25 |
+
print(f"\r正在处理: {chunk['text']}", end="")
|
| 26 |
+
|
| 27 |
+
if audio_data:
|
| 28 |
+
print(f"\n\n语音合成成功,音频大小: {len(audio_data)} 字节")
|
| 29 |
+
with open("direct_edge_tts.mp3", "wb") as f:
|
| 30 |
+
f.write(audio_data)
|
| 31 |
+
print("音频已保存为 direct_edge_tts.mp3")
|
| 32 |
+
return True
|
| 33 |
+
else:
|
| 34 |
+
print("\n\n错误: 没有获取到音频数据")
|
| 35 |
+
return False
|
| 36 |
+
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"\n\n错误: {e}")
|
| 39 |
+
import traceback
|
| 40 |
+
traceback.print_exc()
|
| 41 |
+
return False
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
asyncio.run(test_edge_tts())
|
test_output.mp3
ADDED
|
File without changes
|
test_tts.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
# 测试获取语音列表
|
| 5 |
+
def test_voices():
|
| 6 |
+
print("测试获取语音列表...")
|
| 7 |
+
response = requests.get("http://localhost:7860/voices")
|
| 8 |
+
if response.status_code == 200:
|
| 9 |
+
voices = response.json()
|
| 10 |
+
print(f"获取到 {len(voices)} 种语音")
|
| 11 |
+
# 打印前5种中文语音
|
| 12 |
+
print("中文语音示例:")
|
| 13 |
+
chinese_voices = [v for v in voices if v['locale'].startswith('zh-')]
|
| 14 |
+
for voice in chinese_voices[:5]:
|
| 15 |
+
print(f" - {voice['friendly_name']} ({voice['short_name']})")
|
| 16 |
+
return True
|
| 17 |
+
else:
|
| 18 |
+
print(f"获取语音列表失败: {response.status_code}")
|
| 19 |
+
return False
|
| 20 |
+
|
| 21 |
+
# 测试文本转语音
|
| 22 |
+
def test_tts():
|
| 23 |
+
print("\n测试文本转语音...")
|
| 24 |
+
text = "欢迎使用Edge TTS API服务,这是一个测试语音。"
|
| 25 |
+
payload = {
|
| 26 |
+
"text": text,
|
| 27 |
+
"voice": "zh-CN-YunxiNeural",
|
| 28 |
+
"rate": "+0%",
|
| 29 |
+
"volume": "+0%"
|
| 30 |
+
}
|
| 31 |
+
response = requests.post("http://localhost:7860/tts", json=payload, stream=True)
|
| 32 |
+
if response.status_code == 200:
|
| 33 |
+
with open("test_output.mp3", "wb") as f:
|
| 34 |
+
for chunk in response.iter_content(chunk_size=1024):
|
| 35 |
+
if chunk:
|
| 36 |
+
f.write(chunk)
|
| 37 |
+
print("语音合成成功,已保存为 test_output.mp3")
|
| 38 |
+
return True
|
| 39 |
+
else:
|
| 40 |
+
print(f"语音合成失败: {response.status_code}")
|
| 41 |
+
print(f"错误信息: {response.text}")
|
| 42 |
+
return False
|
| 43 |
+
|
| 44 |
+
if __name__ == "__main__":
|
| 45 |
+
test_voices()
|
| 46 |
+
test_tts()
|