test2 / app.py
XSify's picture
Upload 2 files
97a0e01 verified
import os
import torch
import numpy as np
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
from TTS.api import TTS
import tempfile
import soundfile as sf
# Forzar consentimiento de licencia
os.environ["COQUI_TOS_AGREED"] = "1"
# Monkey patch temporal de torch.load
original_torch_load = torch.load
def patched_torch_load(f, *args, **kwargs):
kwargs["weights_only"] = False
return original_torch_load(f, *args, **kwargs)
torch.load = patched_torch_load
# Cargar modelo XTTS
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
app = FastAPI()
@app.post("/generate-audio/")
async def generate_audio(
text: str = Form(...),
language: str = Form(...),
speaker_wav: UploadFile = Form(...)
):
# Guardar archivo temporalmente
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
contents = await speaker_wav.read()
tmp.write(contents)
tmp_path = tmp.name
# Generar audio
audio = tts.tts(
text=text,
speaker_wav=tmp_path,
language=language,
split_sentences=True,
emotion="Angry"
)
# Guardar output
out_path = tempfile.mktemp(suffix=".wav")
sf.write(out_path, audio, 24000)
return FileResponse(out_path, media_type="audio/wav", filename="output.wav")