edge-tts / app.py
1een's picture
path
85e9f05
import os
import uuid
import asyncio
from fastapi import FastAPI
from pydantic import BaseModel
from fastapi.responses import FileResponse
import edge_tts
from fastapi import HTTPException
from fastapi.responses import JSONResponse
app = FastAPI()
voiceMap = {
"xiaoxiao": "zh-CN-XiaoxiaoNeural",
"xiaoyi": "zh-CN-XiaoyiNeural",
"yunjian": "zh-CN-YunjianNeural",
"yunxi": "zh-CN-YunxiNeural",
"yunxia": "zh-CN-YunxiaNeural",
"yunyang": "zh-CN-YunyangNeural",
"xiaobei": "zh-CN-liaoning-XiaobeiNeural",#辽宁话
"xiaoni": "zh-CN-shaanxi-XiaoniNeural",#陕西话
"hiugaai": "zh-HK-HiuGaaiNeural",#广东话
"hiumaan": "zh-HK-HiuMaanNeural",#广东话
"wanlung": "zh-HK-WanLungNeural",#广东话
"hsiaochen": "zh-TW-HsiaoChenNeural",#台湾话
"hsioayu": "zh-TW-HsiaoYuNeural",#台湾话
"yunjhe": "zh-TW-YunJheNeural",#台湾话
}
def get_voice_id(voice: str):
return voiceMap.get(voice, "zh-CN-XiaoxiaoNeural")
def normalize_percent(val: str) -> str:
if val == "0%":
return "+0%"
return val
def preprocess_text(text: str) -> str:
# 若首字不是标点,则自动加逗号
if not text or text[0] in ",。!?,.!?":
return text
return ",," + text
class TTSRequest(BaseModel):
text: str
voice: str = "xiaoxiao"
rate: str = "+0%" # 语速,默认正常
volume: str = "+0%" # 音量,默认正常
@app.get("/")
async def read_root():
return {"Hello": "World"}
# @app.post("/synthesize")
# def synthesize(req: TTSRequest):
# output_path = f"/tmp/{uuid.uuid4().hex}.mp3"
# voice_id = get_voice_id(req.voice)
# rate = normalize_percent(req.rate)
# volume = normalize_percent(req.volume)
# try:
# async def run_tts():
# communicate = edge_tts.Communicate(
# req.text,
# voice_id,
# rate=rate,
# volume=volume
# )
# await communicate.save(output_path)
# asyncio.run(run_tts())
# return FileResponse(output_path, media_type="audio/mpeg")
# except Exception as e:
# return JSONResponse(status_code=400, content={"error": str(e)})
@app.post("/synthesize")
async def synthesize(req: TTSRequest):
output_path = f"/tmp/{uuid.uuid4().hex}.mp3"
voice_id = get_voice_id(req.voice)
rate = normalize_percent(req.rate)
volume = normalize_percent(req.volume)
text = preprocess_text(req.text)
print(text)
try:
# 直接使用 await,不需要 asyncio.run()
communicate = edge_tts.Communicate(
text,
voice_id,
rate=rate,
volume=volume
)
await communicate.save(output_path)
print(output_path)
return FileResponse(output_path, media_type="audio/mpeg")
except Exception as e:
return JSONResponse(status_code=400, content={"error": str(e)})