Upload tts-server.py
Browse files- tts-server.py +5 -23
tts-server.py
CHANGED
|
@@ -10,9 +10,8 @@ from typing import Optional, List, Dict, Any, AsyncGenerator
|
|
| 10 |
|
| 11 |
import aiohttp
|
| 12 |
import websockets
|
| 13 |
-
from fastapi import FastAPI, HTTPException, Header, Request, BackgroundTasks
|
| 14 |
from fastapi.responses import StreamingResponse, JSONResponse
|
| 15 |
-
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 16 |
from pydantic import BaseModel, Field
|
| 17 |
import uvicorn
|
| 18 |
from contextlib import asynccontextmanager
|
|
@@ -33,7 +32,6 @@ PORT = 1547
|
|
| 33 |
HOST = "0.0.0.0"
|
| 34 |
MODELS_FILE = "models.json"
|
| 35 |
COOKIE_DIR = "cookie" # Directory to store cookie txt files
|
| 36 |
-
AUTH_PASSWORD = os.getenv("PASSWORD", "sk-wei123")
|
| 37 |
|
| 38 |
# Initialize FastAPI
|
| 39 |
@asynccontextmanager
|
|
@@ -52,12 +50,6 @@ async def lifespan(app: FastAPI):
|
|
| 52 |
yield
|
| 53 |
|
| 54 |
app = FastAPI(title="Doubao TTS OpenAI API Server", lifespan=lifespan)
|
| 55 |
-
security = HTTPBearer()
|
| 56 |
-
|
| 57 |
-
async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
| 58 |
-
if credentials.credentials != AUTH_PASSWORD:
|
| 59 |
-
raise HTTPException(status_code=401, detail="Invalid authentication token")
|
| 60 |
-
return credentials.credentials
|
| 61 |
|
| 62 |
# ==========================================
|
| 63 |
# Cookie Manager
|
|
@@ -244,7 +236,7 @@ class DoubaoTTS:
|
|
| 244 |
except Exception as e:
|
| 245 |
logger.error(f"Failed to save models.json: {e}")
|
| 246 |
|
| 247 |
-
async def stream_audio(self, text: str, voice: str,
|
| 248 |
"""Connect to WebSocket and yield audio chunks with retry logic."""
|
| 249 |
|
| 250 |
# Map OpenAI speed (0.25 - 4.0) to Doubao rate (-100 to 100)
|
|
@@ -262,7 +254,7 @@ class DoubaoTTS:
|
|
| 262 |
return
|
| 263 |
|
| 264 |
params = self._get_common_params()
|
| 265 |
-
ws_url = f"{self.ws_url}?format=
|
| 266 |
|
| 267 |
headers = {
|
| 268 |
"Cookie": cookie,
|
|
@@ -271,9 +263,6 @@ class DoubaoTTS:
|
|
| 271 |
}
|
| 272 |
|
| 273 |
try:
|
| 274 |
-
# Try to use extra_headers, if it fails, it might be a version issue or specific environment quirk
|
| 275 |
-
# But standard websockets library uses extra_headers.
|
| 276 |
-
# Let's try to be safe and use the standard way.
|
| 277 |
async with websockets.connect(ws_url, extra_headers=headers) as ws:
|
| 278 |
# Send Text Event
|
| 279 |
msg = {
|
|
@@ -348,7 +337,7 @@ async def check_speech_endpoint():
|
|
| 348 |
return {"status": "ok", "message": "Speech endpoint is ready"}
|
| 349 |
|
| 350 |
@app.post("/v1/audio/speech")
|
| 351 |
-
async def create_speech(req: OpenAIRequest
|
| 352 |
"""
|
| 353 |
OpenAI-compatible speech generation endpoint.
|
| 354 |
"""
|
|
@@ -359,15 +348,8 @@ async def create_speech(req: OpenAIRequest, token: str = Depends(verify_token)):
|
|
| 359 |
if req.response_format == "mp3":
|
| 360 |
media_type = "audio/mpeg"
|
| 361 |
|
| 362 |
-
# Determine format to request from Doubao
|
| 363 |
-
target_format = "aac"
|
| 364 |
-
if req.response_format == "mp3":
|
| 365 |
-
target_format = "mp3"
|
| 366 |
-
elif req.response_format == "pcm":
|
| 367 |
-
target_format = "pcm"
|
| 368 |
-
|
| 369 |
return StreamingResponse(
|
| 370 |
-
engine.stream_audio(req.input, req.voice,
|
| 371 |
media_type=media_type
|
| 372 |
)
|
| 373 |
|
|
|
|
| 10 |
|
| 11 |
import aiohttp
|
| 12 |
import websockets
|
| 13 |
+
from fastapi import FastAPI, HTTPException, Header, Request, BackgroundTasks
|
| 14 |
from fastapi.responses import StreamingResponse, JSONResponse
|
|
|
|
| 15 |
from pydantic import BaseModel, Field
|
| 16 |
import uvicorn
|
| 17 |
from contextlib import asynccontextmanager
|
|
|
|
| 32 |
HOST = "0.0.0.0"
|
| 33 |
MODELS_FILE = "models.json"
|
| 34 |
COOKIE_DIR = "cookie" # Directory to store cookie txt files
|
|
|
|
| 35 |
|
| 36 |
# Initialize FastAPI
|
| 37 |
@asynccontextmanager
|
|
|
|
| 50 |
yield
|
| 51 |
|
| 52 |
app = FastAPI(title="Doubao TTS OpenAI API Server", lifespan=lifespan)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# ==========================================
|
| 55 |
# Cookie Manager
|
|
|
|
| 236 |
except Exception as e:
|
| 237 |
logger.error(f"Failed to save models.json: {e}")
|
| 238 |
|
| 239 |
+
async def stream_audio(self, text: str, voice: str, speed: float = 1.0, pitch: float = 1.0) -> AsyncGenerator[bytes, None]:
|
| 240 |
"""Connect to WebSocket and yield audio chunks with retry logic."""
|
| 241 |
|
| 242 |
# Map OpenAI speed (0.25 - 4.0) to Doubao rate (-100 to 100)
|
|
|
|
| 254 |
return
|
| 255 |
|
| 256 |
params = self._get_common_params()
|
| 257 |
+
ws_url = f"{self.ws_url}?format=aac&speaker={voice}&speech_rate={doubao_rate}&pitch={doubao_pitch}{params}"
|
| 258 |
|
| 259 |
headers = {
|
| 260 |
"Cookie": cookie,
|
|
|
|
| 263 |
}
|
| 264 |
|
| 265 |
try:
|
|
|
|
|
|
|
|
|
|
| 266 |
async with websockets.connect(ws_url, extra_headers=headers) as ws:
|
| 267 |
# Send Text Event
|
| 268 |
msg = {
|
|
|
|
| 337 |
return {"status": "ok", "message": "Speech endpoint is ready"}
|
| 338 |
|
| 339 |
@app.post("/v1/audio/speech")
|
| 340 |
+
async def create_speech(req: OpenAIRequest):
|
| 341 |
"""
|
| 342 |
OpenAI-compatible speech generation endpoint.
|
| 343 |
"""
|
|
|
|
| 348 |
if req.response_format == "mp3":
|
| 349 |
media_type = "audio/mpeg"
|
| 350 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
return StreamingResponse(
|
| 352 |
+
engine.stream_audio(req.input, req.voice, req.speed, req.pitch),
|
| 353 |
media_type=media_type
|
| 354 |
)
|
| 355 |
|