xtts / app.py
don0726's picture
Update app.py
81d7095 verified
import os
import uuid
import torch
import soundfile as sf
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import FileResponse
from TTS.api import TTS
app = FastAPI(title="XTTS Voice Cloning API")
# Load model once (VERY IMPORTANT)
device = "cuda" if torch.cuda.is_available() else "cpu"
tts = TTS(
model_name="tts_models/multilingual/multi-dataset/xtts_v2"
).to(device)
OUTPUT_DIR = "outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
@app.post("/clone-voice/")
async def clone_voice(
text: str = Form(...),
language: str = Form(...),
audio: UploadFile = File(...)
):
try:
# Save uploaded audio
input_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_input.wav"
with open(input_path, "wb") as f:
f.write(await audio.read())
# Output file
output_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_output.wav"
# Generate speech
tts.tts_to_file(
text=text,
speaker_wav=input_path,
language=language,
file_path=output_path
)
return FileResponse(output_path, media_type="audio/wav")
except Exception as e:
return {"error": str(e)}