test3 / app.py
XSify's picture
Update app.py
1c0a758 verified
import os
import torch
import numpy as np
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
from TTS.api import TTS
import tempfile
import soundfile as sf
# Forzar consentimiento de licencia
os.environ["COQUI_TOS_AGREED"] = "1"
# Monkey patch temporal de torch.load
original_torch_load = torch.load
def patched_torch_load(f, *args, **kwargs):
kwargs["weights_only"] = False
return original_torch_load(f, *args, **kwargs)
torch.load = patched_torch_load
# Cargar modelo XTTS
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
app = FastAPI()
@app.post("/generate-audio/")
async def generate_audio(
text: str = Form(...),
language: str = Form(...),
speaker_wav: UploadFile = Form(...)
):
print("PRIOR WITH")
# Guardar archivo temporalmente
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
contents = await speaker_wav.read()
tmp.write(contents)
tmp_path = tmp.name
print("PRIOR AUDIO")
# Generar audio
audio = tts.tts(
text=text,
speaker_wav=tmp_path,
language=language,
split_sentences=True,
emotion="Angry"
)
print("PRIOR MKTEMP")
# Guardar output
out_path = tempfile.mktemp(suffix=".wav")
sf.write(out_path, audio, 24000)
print("PRIOR RETURN")
return FileResponse(out_path, media_type="audio/wav", filename="output.wav")