Spaces:
Paused
Paused
File size: 3,661 Bytes
5c31db9 15a3086 5c31db9 15a3086 5c31db9 15a3086 5c31db9 15a3086 e00ded0 15a3086 e00ded0 15a3086 5c31db9 e8ceee6 15a3086 5c31db9 e00ded0 5c31db9 15a3086 5c31db9 15a3086 5c31db9 15a3086 5c31db9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | import os
import torch
import torchaudio as ta
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from pydantic import BaseModel
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
import functools
import uvicorn
import asyncio
# Patch torch.load for CPU if necessary (as in app.py)
# torch.load = functools.partial(torch.load, map_location='cpu')
app = FastAPI()
# 1. Determine device dynamically
device_map = "cuda" if torch.cuda.is_available() else "cpu"
# Create a lock to ensure only one generation happens at a time (important for GPU)
model_lock = asyncio.Lock()
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"Using device: {device_map} with name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print("Loading TTS model...")
# Using Multilingual model as requested
tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)
# Optimize for T4 GPU using half-precision (FP16)
# We use autocast during inference for the best balance of speed and stability
if device_map == "cuda":
print("GPU optimization: FP16 Autocast enabled.")
print("Model loaded.")
class TTSRequest(BaseModel):
message: str
language: str
channelID: str
username: str
messageid: str
def cleanup_file(filepath: str):
"""Deletes the file after it has been sent."""
try:
if os.path.exists(filepath):
os.remove(filepath)
print(f"Deleted temporary file: {filepath}")
except Exception as e:
print(f"Error deleting file {filepath}: {e}")
def generate_audio(req: TTSRequest) -> str:
"""Generates audio and returns the filename."""
os.makedirs("outputs", exist_ok=True)
filename = os.path.join("outputs", f"{req.channelID}-{req.username}-{req.messageid}.wav")
try:
# Use autocast to automatically handle float16/float32 mixing
# This prevents the "mat1 and mat2 must have the same dtype" error
if device_map == "cuda":
with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
audio_tensor = tts_model.generate(req.message, language_id=req.language)
else:
audio_tensor = tts_model.generate(req.message, language_id=req.language)
ta.save(filename, audio_tensor, tts_model.sr)
return filename
except Exception as e:
raise HTTPException(status_code=500, detail=f"TTS Generation failed: {str(e)}")
@app.post("/tts")
async def tts_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
async with model_lock:
filename = await asyncio.to_thread(generate_audio, req)
background_tasks.add_task(cleanup_file, filename)
return FileResponse(path=filename, filename=filename, media_type='audio/wav')
@app.post("/stream")
async def stream_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
async with model_lock:
filename = await asyncio.to_thread(generate_audio, req)
background_tasks.add_task(cleanup_file, filename)
# FileResponse handles streaming efficiently for large files
return FileResponse(path=filename, media_type='audio/wav')
@app.post("/test")
async def test_endpoint(req: TTSRequest):
async with model_lock:
filename = await asyncio.to_thread(generate_audio, req)
# For /test, we don't delete the file and just return "ok"
return {"status": "ok", "filename": filename}
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
uvicorn.run(app, host="0.0.0.0", port=port)
|