File size: 1,447 Bytes
1c0a758 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
import torch
import numpy as np
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
from TTS.api import TTS
import tempfile
import soundfile as sf
# Forzar consentimiento de licencia
os.environ["COQUI_TOS_AGREED"] = "1"
# Monkey patch temporal de torch.load
original_torch_load = torch.load
def patched_torch_load(f, *args, **kwargs):
kwargs["weights_only"] = False
return original_torch_load(f, *args, **kwargs)
torch.load = patched_torch_load
# Cargar modelo XTTS
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
app = FastAPI()
@app.post("/generate-audio/")
async def generate_audio(
text: str = Form(...),
language: str = Form(...),
speaker_wav: UploadFile = Form(...)
):
print("PRIOR WITH")
# Guardar archivo temporalmente
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
contents = await speaker_wav.read()
tmp.write(contents)
tmp_path = tmp.name
print("PRIOR AUDIO")
# Generar audio
audio = tts.tts(
text=text,
speaker_wav=tmp_path,
language=language,
split_sentences=True,
emotion="Angry"
)
print("PRIOR MKTEMP")
# Guardar output
out_path = tempfile.mktemp(suffix=".wav")
sf.write(out_path, audio, 24000)
print("PRIOR RETURN")
return FileResponse(out_path, media_type="audio/wav", filename="output.wav")
|