| """
|
| Espacio de Hugging Face: Text-to-Speech (MMS-TTS)
|
| ==================================================
|
| Modelo: facebook/mms-tts-spa
|
|
|
| Entrada: Texto en español
|
| Salida: Audio WAV
|
| """
|
|
|
| import gradio as gr
|
| import numpy as np
|
| import tempfile
|
| import os
|
| from transformers import pipeline
|
|
|
|
|
| class TTSService:
|
| def __init__(self):
|
| print("Cargando modelo TTS (facebook/mms-tts-spa)...")
|
| self.synthesizer = pipeline(
|
| "text-to-speech",
|
| model="facebook/mms-tts-spa",
|
| device=-1,
|
| )
|
| print("Modelo TTS cargado correctamente.")
|
|
|
| def synthesize(self, text: str) -> str:
|
| """Genera audio a partir del texto y devuelve la ruta del archivo."""
|
| output = self.synthesizer(text)
|
|
|
| if isinstance(output, list):
|
| output = output[0]
|
|
|
| waveform = np.asarray(output["audio"])
|
| sampling_rate = int(output["sampling_rate"])
|
|
|
|
|
| if waveform.ndim == 2:
|
| if waveform.shape[0] <= 8 and waveform.shape[1] > waveform.shape[0]:
|
| waveform = waveform.T
|
| if waveform.shape[1] == 1:
|
| waveform = waveform[:, 0]
|
|
|
|
|
| if waveform.dtype.kind == "f":
|
| waveform = np.clip(waveform, -1.0, 1.0)
|
| waveform_int16 = (waveform * 32767).astype(np.int16)
|
| else:
|
| waveform_int16 = waveform.astype(np.int16)
|
|
|
|
|
| import scipy.io.wavfile
|
| temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| scipy.io.wavfile.write(temp_file.name, sampling_rate, waveform_int16)
|
|
|
| return temp_file.name
|
|
|
|
|
|
|
| print("Inicializando servicio TTS...")
|
| service = TTSService()
|
| print("Servicio listo.")
|
|
|
|
|
| def texto_a_voz(texto: str) -> str:
|
| """Función principal para Gradio."""
|
| if not texto or not texto.strip():
|
| return None
|
|
|
| try:
|
| audio_path = service.synthesize(texto)
|
| return audio_path
|
| except Exception as e:
|
| print(f"Error: {e}")
|
| return None
|
|
|
|
|
|
|
| iface = gr.Interface(
|
| fn=texto_a_voz,
|
| inputs=gr.Textbox(
|
| lines=5,
|
| placeholder="Escribe el texto que quieres escuchar...",
|
| label="Texto"
|
| ),
|
| outputs=gr.Audio(label="Audio Generado", type="filepath"),
|
| title="🔊 Text-to-Speech (MMS-TTS)",
|
| description="Convierte texto en español a voz usando el modelo MMS-TTS de Facebook.",
|
| examples=[
|
| ["Hola, esto es una prueba del sistema de texto a voz."],
|
| ["La inteligencia artificial está revolucionando el mundo."]
|
| ],
|
| flagging_mode="never",
|
| )
|
|
|
| if __name__ == "__main__":
|
| iface.launch()
|
|
|