|
|
import os
|
|
|
import torch
|
|
|
import numpy as np
|
|
|
from fastapi import FastAPI, UploadFile, Form
|
|
|
from fastapi.responses import FileResponse
|
|
|
from TTS.api import TTS
|
|
|
import tempfile
|
|
|
import soundfile as sf
|
|
|
|
|
|
|
|
|
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
|
|
|
|
|
|
original_torch_load = torch.load
|
|
|
|
|
|
def patched_torch_load(f, *args, **kwargs):
|
|
|
kwargs["weights_only"] = False
|
|
|
return original_torch_load(f, *args, **kwargs)
|
|
|
|
|
|
torch.load = patched_torch_load
|
|
|
|
|
|
|
|
|
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
|
|
|
|
|
|
app = FastAPI()
|
|
|
|
|
|
@app.post("/generate-audio/")
|
|
|
async def generate_audio(
|
|
|
text: str = Form(...),
|
|
|
language: str = Form(...),
|
|
|
speaker_wav: UploadFile = Form(...)
|
|
|
):
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
|
|
contents = await speaker_wav.read()
|
|
|
tmp.write(contents)
|
|
|
tmp_path = tmp.name
|
|
|
|
|
|
|
|
|
audio = tts.tts(
|
|
|
text=text,
|
|
|
speaker_wav=tmp_path,
|
|
|
language=language,
|
|
|
split_sentences=True,
|
|
|
emotion="Angry"
|
|
|
)
|
|
|
|
|
|
|
|
|
out_path = tempfile.mktemp(suffix=".wav")
|
|
|
sf.write(out_path, audio, 24000)
|
|
|
|
|
|
return FileResponse(out_path, media_type="audio/wav", filename="output.wav")
|
|
|
|