tts / app.py
igna7's picture
add tts files
a101568 verified
"""
Espacio de Hugging Face: Text-to-Speech (MMS-TTS)
==================================================
Modelo: facebook/mms-tts-spa
Entrada: Texto en español
Salida: Audio WAV
"""
import gradio as gr
import numpy as np
import tempfile
import os
from transformers import pipeline
class TTSService:
def __init__(self):
print("Cargando modelo TTS (facebook/mms-tts-spa)...")
self.synthesizer = pipeline(
"text-to-speech",
model="facebook/mms-tts-spa",
device=-1, # CPU
)
print("Modelo TTS cargado correctamente.")
def synthesize(self, text: str) -> str:
"""Genera audio a partir del texto y devuelve la ruta del archivo."""
output = self.synthesizer(text)
if isinstance(output, list):
output = output[0]
waveform = np.asarray(output["audio"])
sampling_rate = int(output["sampling_rate"])
# Normalizar forma del audio
if waveform.ndim == 2:
if waveform.shape[0] <= 8 and waveform.shape[1] > waveform.shape[0]:
waveform = waveform.T
if waveform.shape[1] == 1:
waveform = waveform[:, 0]
# Convertir a int16
if waveform.dtype.kind == "f":
waveform = np.clip(waveform, -1.0, 1.0)
waveform_int16 = (waveform * 32767).astype(np.int16)
else:
waveform_int16 = waveform.astype(np.int16)
# Guardar como WAV temporal
import scipy.io.wavfile
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
scipy.io.wavfile.write(temp_file.name, sampling_rate, waveform_int16)
return temp_file.name
# Inicializar servicio
print("Inicializando servicio TTS...")
service = TTSService()
print("Servicio listo.")
def texto_a_voz(texto: str) -> str:
"""Función principal para Gradio."""
if not texto or not texto.strip():
return None
try:
audio_path = service.synthesize(texto)
return audio_path
except Exception as e:
print(f"Error: {e}")
return None
# Interfaz Gradio
iface = gr.Interface(
fn=texto_a_voz,
inputs=gr.Textbox(
lines=5,
placeholder="Escribe el texto que quieres escuchar...",
label="Texto"
),
outputs=gr.Audio(label="Audio Generado", type="filepath"),
title="🔊 Text-to-Speech (MMS-TTS)",
description="Convierte texto en español a voz usando el modelo MMS-TTS de Facebook.",
examples=[
["Hola, esto es una prueba del sistema de texto a voz."],
["La inteligencia artificial está revolucionando el mundo."]
],
flagging_mode="never",
)
if __name__ == "__main__":
iface.launch()