|
|
import os |
|
|
import gradio as gr |
|
|
import torch |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
|
|
|
|
|
|
original_torch_load = torch.load |
|
|
|
|
|
def patched_torch_load(f, *args, **kwargs): |
|
|
kwargs["weights_only"] = False |
|
|
return original_torch_load(f, *args, **kwargs) |
|
|
|
|
|
torch.load = patched_torch_load |
|
|
|
|
|
|
|
|
from TTS.api import TTS |
|
|
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2") |
|
|
|
|
|
|
|
|
def generate_audio(text, language, speaker_wav): |
|
|
if speaker_wav is not None: |
|
|
audio = tts.tts(text=text, speaker_wav=speaker_wav, language=language) |
|
|
audio_np = np.array(audio, dtype=np.float16) |
|
|
return (24000, audio_np) |
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_audio, |
|
|
inputs=[ |
|
|
gr.Text(label="Texto"), |
|
|
gr.Text(label="Idioma (ej: 'es', 'en')"), |
|
|
gr.Audio(type="filepath", label="Audio de voz") |
|
|
], |
|
|
outputs="audio" |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|
|
|
|