Spaces:

1een
/

edge-tts

Running

App Files Files Community

1een commited on Jul 15, 2025

Commit

8909311

1 Parent(s): 9317d4d

edge

Browse files

Files changed (4) hide show

Dockerfile +2 -15
README.md +19 -30
app.py +10 -20
requirements.txt +1 -3

Dockerfile CHANGED Viewed

@@ -1,23 +1,10 @@
 FROM python:3.10-slim
-ENV NUMBA_DISABLE_CACHE=1
-ENV MPLCONFIGDIR=/tmp
-ENV XDG_CACHE_HOME=/tmp
-ENV COQUI_TTS_HOME=/tmp/tts_cache
-ENV HF_HOME=/tmp/huggingface
-ENV TRANSFORMERS_CACHE=/tmp/huggingface/transformers
-ENV TORCH_HOME=/tmp/torch
-ENV SENTENCEPIECE_CACHE=/tmp/sentencepiece
-ENV PYTHON_EGG_CACHE=/tmp/python-eggs
-ENV HOME=/tmp
-ENV USER=root
 WORKDIR /app
 COPY . .
-RUN apt-get update && apt-get install -y espeak-ng libsndfile1 ffmpeg && \
-    pip install --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
 EXPOSE 7860
-CMD env NUMBA_DISABLE_CACHE=1 uvicorn app:app --host 0.0.0.0 --port 7860

 FROM python:3.10-slim
 WORKDIR /app
 COPY . .
+RUN pip install --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
 EXPOSE 7860
+CMD uvicorn app:app --host 0.0.0.0 --port 7860

README.md CHANGED Viewed

@@ -9,62 +9,51 @@ pinned: false
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
-# 中文语音合成 API（TTS）
-本项目基于 [Coqui TTS](https://github.com/coqui-ai/TTS) 和 FastAPI，提供中文文本转语音（Text-to-Speech, TTS）API 服务，支持 Docker 一键部署。
 ## 功能说明
-- 支持中文文本转语音，使用 baker/tacotron2-DDC-GST 模型。
-- 提供 RESTful API 接口，返回语音 wav 文件。
 - 适合语音播报、语音助手等场景。
 ## 依赖环境
-- Python 3.10
-- TTS==0.22.0
 - fastapi
 - uvicorn
-- numba>=0.58.1
-- torch==2.5.1
-- 需系统依赖：espeak-ng、libsndfile1、ffmpeg
 ## 快速开始
-### 1. Docker 部署
 ```bash
-docker build -t tts-api .
-docker run -d -p 7860:7860 tts-api
 ```
-### 2. 本地运行
-1. 安装系统依赖：
-   ```bash
-   sudo apt-get update && sudo apt-get install -y espeak-ng libsndfile1 ffmpeg
-   ```
-2. 安装 Python 依赖：
-   ```bash
-   pip install -r requirements.txt
-   ```
-3. 启动服务：
-   ```bash
-   uvicorn app:app --host 0.0.0.0 --port 7860
-   ```
 ## API 用法
 ### POST /synthesize
 - **请求体**：JSON
   ```json
-  { "text": "你好，世界！" }
   ```
-- **返回**：音频文件（audio/wav）
 ### 示例
 ```bash
 curl -X POST "http://localhost:7860/synthesize" \
      -H "Content-Type: application/json" \
-     -d '{"text": "你好，世界！"}' --output output.wav
 ```
 ## 参考
-- [Coqui TTS 官方文档](https://tts.readthedocs.io/zh/latest/)
-- [HuggingFace 模型库](https://huggingface.co/coqui/)

 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# 基于 Edge-TTS 的中文语音合成 API
+本项目基于 [edge-tts](https://github.com/rany2/edge-tts) 和 FastAPI，提供中文及多语言文本转语音（Text-to-Speech, TTS）API 服务，支持自定义发音人，输出 mp3 文件。
 ## 功能说明
+- 支持多语言文本转语音，默认中文女声（zh-CN-XiaoxiaoNeural）。
+- 可通过参数自定义 Microsoft Edge TTS 支持的 voice。
+- 提供 RESTful API 接口，返回 mp3 音频文件。
 - 适合语音播报、语音助手等场景。
 ## 依赖环境
+- Python 3.8+
+- edge-tts
 - fastapi
 - uvicorn
 ## 快速开始
+### 1. 安装依赖
 ```bash
+pip install -r requirements.txt
 ```
+### 2. 启动服务
+```bash
+uvicorn app:app --host 0.0.0.0 --port 7860
+```
 ## API 用法
 ### POST /synthesize
 - **请求体**：JSON
   ```json
+  { "text": "你好，世界！", "voice": "zh-CN-XiaoxiaoNeural" }
   ```
+  - `voice` 可选，默认 zh-CN-XiaoxiaoNeural。可用 voice 参考 edge-tts 官方文档。
+- **返回**：音频文件（audio/mpeg，mp3 格式）
 ### 示例
 ```bash
 curl -X POST "http://localhost:7860/synthesize" \
      -H "Content-Type: application/json" \
+     -d '{"text": "你好，世界！", "voice": "zh-CN-XiaoxiaoNeural"}' --output output.mp3
 ```
 ## 参考
+- [edge-tts 官方文档](https://github.com/rany2/edge-tts)
+- [可用 voice 列表](https://github.com/rany2/edge-tts#voices)

app.py CHANGED Viewed

@@ -1,32 +1,22 @@
 import os
-os.environ["NUMBA_DISABLE_CACHE"] = "1"
-os.environ["NUMBA_CACHE_DIR"] = "/tmp/numba_cache"
-os.environ["MPLCONFIGDIR"] = "/tmp"
-os.environ["XDG_CACHE_HOME"] = "/tmp"
-os.environ["COQUI_TTS_HOME"] = "/tmp/tts_cache"
-os.environ["HF_HOME"] = "/tmp/huggingface"
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
-os.environ["TORCH_HOME"] = "/tmp/torch"
-os.environ["SENTENCEPIECE_CACHE"] = "/tmp/sentencepiece"
-os.environ["PYTHON_EGG_CACHE"] = "/tmp/python-eggs"
-from TTS.api import TTS
 from fastapi import FastAPI
 from pydantic import BaseModel
 from fastapi.responses import FileResponse
-import uvicorn
-import uuid
 app = FastAPI()
-# 加载中文 Tacotron2 模型
-tts = TTS(model_name="tts_models/zh-CN/baker/tacotron2-DDC-GST", progress_bar=False)
 class TTSRequest(BaseModel):
     text: str
 @app.post("/synthesize")
 def synthesize(req: TTSRequest):
-    output_path = f"/tmp/{uuid.uuid4().hex}.wav"
-    tts.tts_to_file(text=req.text, file_path=output_path)
-    return FileResponse(output_path, media_type="audio/wav")

 import os
+import uuid
+import asyncio
 from fastapi import FastAPI
 from pydantic import BaseModel
 from fastapi.responses import FileResponse
+import edge_tts
 app = FastAPI()
 class TTSRequest(BaseModel):
     text: str
+    voice: str = "zh-CN-XiaoxiaoNeural"  # 默认中文女声
 @app.post("/synthesize")
 def synthesize(req: TTSRequest):
+    output_path = f"/tmp/{uuid.uuid4().hex}.mp3"
+    async def run_tts():
+        communicate = edge_tts.Communicate(req.text, req.voice)
+        await communicate.save(output_path)
+    asyncio.run(run_tts())
+    return FileResponse(output_path, media_type="audio/mpeg")

requirements.txt CHANGED Viewed

@@ -1,5 +1,3 @@
-TTS==0.22.0
 fastapi
 uvicorn
-numba>=0.58.1
-torch==2.5.1

+edge-tts
 fastapi
 uvicorn