File size: 2,856 Bytes
8f38664 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.8"
# dependencies = [
# "fastapi>=0.100.0",
# "uvicorn[standard]>=0.20.0",
# "pydantic>=2.0.0",
# "httpx>=0.25.0",
# "typer>=0.9.0",
# ]
# ///
"""
Chatterbox TTS Model Server - Mock Implementation
Compatible with HuggingFace InferenceClient text_to_speech API
"""
import argparse
import os
from pathlib import Path
from typing import Optional, Dict, Any
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
class TTSRequest(BaseModel):
inputs: str # text to synthesize
parameters: Optional[Dict[str, Any]] = None
app = FastAPI(title="Chatterbox TTS Server", version="1.0.0")
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Path to sample audio file
SAMPLE_AUDIO_PATH = None
@app.get("/")
async def health_check():
return {"status": "ok", "model": "ResembleAI/chatterbox"}
@app.post("/")
async def text_to_speech(request: TTSRequest):
"""
Text-to-speech endpoint compatible with HuggingFace InferenceClient
Always returns the same sample audio file for testing
"""
if not SAMPLE_AUDIO_PATH or not os.path.exists(SAMPLE_AUDIO_PATH):
raise HTTPException(
status_code=500,
detail="Sample audio file not found. Please provide --sample-audio path."
)
print(f"TTS Request - Text: '{request.inputs[:50]}...' Parameters: {request.parameters}")
# Return the sample audio file
return FileResponse(
SAMPLE_AUDIO_PATH,
media_type="audio/wav",
filename="generated_audio.wav"
)
def main():
global SAMPLE_AUDIO_PATH
parser = argparse.ArgumentParser(description="Start Chatterbox TTS Server")
parser.add_argument("--port", "-p", type=int, default=7860, help="Port to run server on")
parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
parser.add_argument("--sample-audio", required=True, help="Path to sample audio file to return")
args = parser.parse_args()
# Validate sample audio file exists
if not os.path.exists(args.sample_audio):
print(f"Error: Sample audio file not found: {args.sample_audio}")
exit(1)
SAMPLE_AUDIO_PATH = args.sample_audio
print(f"ποΈ Starting Chatterbox TTS Server on {args.host}:{args.port}")
print(f"π Using sample audio: {args.sample_audio}")
print(f"π API endpoint: http://localhost:{args.port}/")
uvicorn.run(
app,
host=args.host,
port=args.port,
log_level="info"
)
if __name__ == "__main__":
main() |