File size: 3,661 Bytes
5c31db9
 
 
 
 
 
 
 
 
15a3086
5c31db9
 
 
 
 
 
 
 
 
15a3086
 
 
5c31db9
 
 
 
15a3086
5c31db9
15a3086
 
e00ded0
15a3086
e00ded0
15a3086
5c31db9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8ceee6
 
15a3086
5c31db9
e00ded0
 
 
 
 
 
 
 
5c31db9
 
 
 
 
 
 
15a3086
 
5c31db9
 
 
 
 
15a3086
 
5c31db9
 
 
 
 
 
15a3086
 
5c31db9
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import torch
import torchaudio as ta
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from pydantic import BaseModel
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
import functools
import uvicorn
import asyncio

# Patch torch.load for CPU if necessary (as in app.py)
# torch.load = functools.partial(torch.load, map_location='cpu')

app = FastAPI()

# 1. Determine device dynamically
device_map = "cuda" if torch.cuda.is_available() else "cpu"

# Create a lock to ensure only one generation happens at a time (important for GPU)
model_lock = asyncio.Lock()

print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"Using device: {device_map} with name: {torch.cuda.get_device_name(torch.cuda.current_device())}")

print("Loading TTS model...")
# Using Multilingual model as requested
tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)

# Optimize for T4 GPU using half-precision (FP16)
# We use autocast during inference for the best balance of speed and stability
if device_map == "cuda":
    print("GPU optimization: FP16 Autocast enabled.")
    
print("Model loaded.")

class TTSRequest(BaseModel):
    message: str
    language: str
    channelID: str
    username: str
    messageid: str

def cleanup_file(filepath: str):
    """Deletes the file after it has been sent."""
    try:
        if os.path.exists(filepath):
            os.remove(filepath)
            print(f"Deleted temporary file: {filepath}")
    except Exception as e:
        print(f"Error deleting file {filepath}: {e}")

def generate_audio(req: TTSRequest) -> str:
    """Generates audio and returns the filename."""
    os.makedirs("outputs", exist_ok=True)
    filename = os.path.join("outputs", f"{req.channelID}-{req.username}-{req.messageid}.wav")
    
    try:
        # Use autocast to automatically handle float16/float32 mixing
        # This prevents the "mat1 and mat2 must have the same dtype" error
        if device_map == "cuda":
            with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
                audio_tensor = tts_model.generate(req.message, language_id=req.language)
        else:
            audio_tensor = tts_model.generate(req.message, language_id=req.language)

        ta.save(filename, audio_tensor, tts_model.sr)
        return filename
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"TTS Generation failed: {str(e)}")

@app.post("/tts")
async def tts_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
    async with model_lock:
        filename = await asyncio.to_thread(generate_audio, req)
    background_tasks.add_task(cleanup_file, filename)
    return FileResponse(path=filename, filename=filename, media_type='audio/wav')

@app.post("/stream")
async def stream_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
    async with model_lock:
        filename = await asyncio.to_thread(generate_audio, req)
    background_tasks.add_task(cleanup_file, filename)
    # FileResponse handles streaming efficiently for large files
    return FileResponse(path=filename, media_type='audio/wav')

@app.post("/test")
async def test_endpoint(req: TTSRequest):
    async with model_lock:
        filename = await asyncio.to_thread(generate_audio, req)
    # For /test, we don't delete the file and just return "ok"
    return {"status": "ok", "filename": filename}

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run(app, host="0.0.0.0", port=port)