File size: 1,196 Bytes
81d7095 545eaa6 81d7095 41ed8fc 81d7095 41ed8fc 81d7095 545eaa6 81d7095 41ed8fc 5443647 81d7095 41ed8fc 81d7095 41ed8fc 81d7095 41ed8fc 81d7095 41ed8fc 81d7095 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | import os
import uuid
import torch
import soundfile as sf
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import FileResponse
from TTS.api import TTS
app = FastAPI(title="XTTS Voice Cloning API")
# Load model once (VERY IMPORTANT)
device = "cuda" if torch.cuda.is_available() else "cpu"
tts = TTS(
model_name="tts_models/multilingual/multi-dataset/xtts_v2"
).to(device)
OUTPUT_DIR = "outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
@app.post("/clone-voice/")
async def clone_voice(
text: str = Form(...),
language: str = Form(...),
audio: UploadFile = File(...)
):
try:
# Save uploaded audio
input_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_input.wav"
with open(input_path, "wb") as f:
f.write(await audio.read())
# Output file
output_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_output.wav"
# Generate speech
tts.tts_to_file(
text=text,
speaker_wav=input_path,
language=language,
file_path=output_path
)
return FileResponse(output_path, media_type="audio/wav")
except Exception as e:
return {"error": str(e)} |