File size: 3,219 Bytes
3c99414
 
 
 
 
6ab5e4f
3c99414
6ab5e4f
 
 
3c99414
 
6ab5e4f
3c99414
6ab5e4f
 
3c99414
 
 
 
 
 
6ab5e4f
3c99414
6ab5e4f
 
 
3c99414
 
 
 
 
 
 
 
 
 
6ab5e4f
 
3c99414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ab5e4f
3c99414
 
 
 
 
 
6ab5e4f
3c99414
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# ─── ENV SETUP (VERY IMPORTANT - keep at top) ────────────────────────────────
import os
os.environ["COQUI_TOS_AGREED"] = "1"

# ─── IMPORTS ────────────────────────────────────────────────────────────────
from fastapi import FastAPI
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel
from TTS.api import TTS
import uuid
import threading
import time

# ─── APP INIT ───────────────────────────────────────────────────────────────
app = FastAPI()

# ─── LOAD MODEL (runs once) ─────────────────────────────────────────────────
tts = TTS(
    model_name="tts_models/multilingual/multi-dataset/xtts_v2",
    progress_bar=False,
    gpu=False
)

# ─── REQUEST MODEL ──────────────────────────────────────────────────────────
class TTSRequest(BaseModel):
    text: str

# ─── CLEANUP FUNCTION ───────────────────────────────────────────────────────
def cleanup_file(path: str):
    time.sleep(15)  # wait before deleting
    try:
        if os.path.exists(path):
            os.remove(path)
    except Exception as e:
        print(f"Cleanup error: {e}")

# ─── ROUTE ──────────────────────────────────────────────────────────────────
@app.post("/tts")
async def generate_audio(req: TTSRequest):
    try:
        # Validate input
        if not req.text or not req.text.strip():
            return JSONResponse(
                status_code=400,
                content={"error": "Text input is empty"}
            )

        # Check speaker file exists
        speaker_path = "sage.wav"
        if not os.path.exists(speaker_path):
            return JSONResponse(
                status_code=500,
                content={"error": "Speaker file (sage.wav) missing on server"}
            )

        # Generate unique output file
        file_name = f"output_{uuid.uuid4().hex}.wav"

        # Generate speech
        tts.tts_to_file(
            text=req.text,
            speaker_wav=speaker_path,
            file_path=file_name
        )

        # Schedule cleanup
        threading.Thread(target=cleanup_file, args=(file_name,)).start()

        # Return audio file
        return FileResponse(
            file_name,
            media_type="audio/wav",
            filename="speech.wav"
        )

    except Exception as e:
        print(f"TTS ERROR: {e}")
        return JSONResponse(
            status_code=500,
            content={"error": str(e)}
        )