|
|
import os |
|
|
import torch |
|
|
import numpy as np |
|
|
from fastapi import FastAPI, UploadFile, Form |
|
|
from fastapi.responses import FileResponse |
|
|
from TTS.api import TTS |
|
|
import tempfile |
|
|
import soundfile as sf |
|
|
|
|
|
|
|
|
os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
|
|
|
|
|
|
original_torch_load = torch.load |
|
|
|
|
|
def patched_torch_load(f, *args, **kwargs): |
|
|
kwargs["weights_only"] = False |
|
|
return original_torch_load(f, *args, **kwargs) |
|
|
|
|
|
torch.load = patched_torch_load |
|
|
|
|
|
|
|
|
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2") |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
@app.post("/generate-audio/") |
|
|
async def generate_audio( |
|
|
text: str = Form(...), |
|
|
language: str = Form(...), |
|
|
speaker_wav: UploadFile = Form(...) |
|
|
): |
|
|
print("PRIOR WITH") |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
|
|
contents = await speaker_wav.read() |
|
|
tmp.write(contents) |
|
|
tmp_path = tmp.name |
|
|
|
|
|
print("PRIOR AUDIO") |
|
|
|
|
|
audio = tts.tts( |
|
|
text=text, |
|
|
speaker_wav=tmp_path, |
|
|
language=language, |
|
|
split_sentences=True, |
|
|
emotion="Angry" |
|
|
) |
|
|
|
|
|
print("PRIOR MKTEMP") |
|
|
|
|
|
out_path = tempfile.mktemp(suffix=".wav") |
|
|
sf.write(out_path, audio, 24000) |
|
|
|
|
|
print("PRIOR RETURN") |
|
|
return FileResponse(out_path, media_type="audio/wav", filename="output.wav") |
|
|
|