Spaces:

Andro0s
/

Lector

Sleeping

App Files Files Community

Andro0s commited on Mar 13

Commit

9d14874

verified ·

1 Parent(s): a67fbaa

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -113

app.py CHANGED Viewed

@@ -1,136 +1,63 @@
 import gradio as gr
-from TTS.api import TTS
 import torch
-import os
-# Configurar device
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Cargar modelo de español (se descarga automáticamente la primera vez)
-print("🔄 Cargando modelo de TTS en español...")
-tts = TTS("tts_models/es/css10/vits").to(device)
-print("✅ Modelo cargado correctamente")
-def text_to_speech(text, speed=1.0):
-    """
-    Convierte texto a voz en español latino
-    """
     if not text or not text.strip():
-        return None, "⚠️ Por favor ingresa algún texto"
-    # Limitar longitud para evitar tiempos muy largos
-    if len(text) > 500:
-        return None, "⚠️ El texto es muy largo. Máximo 500 caracteres."
     try:
-        output_path = "output.wav"
-        # Generar audio
-        tts.tts_to_file(
-            text=text,
-            file_path=output_path,
-            speed=speed
         )
-        return output_path, "✅ ¡Audio generado con éxito!"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
-# CSS personalizado para mejorar la apariencia
-custom_css = """
-.gradio-container {
-    max-width: 800px !important;
-    margin: 0 auto;
-}
-.title {
-    text-align: center;
-    color: #2c3e50;
-}
-"""
-# Crear la interfaz
-with gr.Blocks(css=custom_css, title="🎙️ TTS Español Latino") as demo:
-    gr.Markdown("""
-    <div class="title">
-    <h1>🎙️ Lector de Texto a Voz</h1>
-    <h3>Español Latino - Rápido & Gratis</h3>
-    </div>
-    Escribe cualquier texto y escúchalo en voz alta en segundos.
-    """)
     with gr.Row():
-        with gr.Column(scale=2):
-            text_input = gr.Textbox(
-                label="📝 Texto a convertir",
-                placeholder="Escribe o pega aquí tu texto en español...",
-                lines=4,
-                max_lines=6
-            )
-            with gr.Row():
-                speed_slider = gr.Slider(
-                    minimum=0.5,
-                    maximum=1.5,
-                    value=1.0,
-                    step=0.1,
-                    label="⚡ Velocidad de voz"
-                )
-            generate_btn = gr.Button(
-                "🔊 Generar Voz",
-                variant="primary",
-                size="lg"
-            )
-            status_text = gr.Textbox(
-                label="Estado",
-                interactive=False,
-                value="Listo para generar audio"
-            )
-        with gr.Column(scale=1):
-            audio_output = gr.Audio(
-                label="🔊 Tu Audio",
-                type="filepath",
-                autoplay=False
-            )
-            gr.Markdown("""
-            ### 💡 Consejos:
-            - **Máximo**: 500 caracteres
-            - Usa puntuación para pausas naturales
-            - Ajusta la velocidad si es necesario
-            - Funciona mejor con oraciones completas
-            """)
-    # Ejemplos rápidos
-    gr.Examples(
-        examples=[
-            ["Hola, ¿cómo estás? Bienvenido a este lector de texto a voz."],
-            ["El sol brilla intensamente sobre las playas de México."],
-            ["La tecnología nos permite crear herramientas increíbles cada día."],
-            ["¿Podrías repetir eso, por favor? No lo escuché bien."]
-        ],
-        inputs=text_input,
-        label="🎯 Probar con ejemplos"
-    )
-    # Eventos
-    generate_btn.click(
-        fn=text_to_speech,
-        inputs=[text_input, speed_slider],
-        outputs=[audio_output, status_text]
-    )
-    # Limpiar estado al escribir
-    text_input.change(
-        fn=lambda: ("", "⌨️ Escribiendo..."),
-        outputs=[audio_output, status_text]
-    )
-# Lanzar
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from transformers import pipeline
+from datasets import load_dataset
 import torch
+import soundfile as sf
+import numpy as np
+# Cargar modelo de TTS de Microsoft (pequeño y rápido)
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print("🔄 Cargando modelo TTS...")
+# Usamos un modelo específico para español más ligero
+synthesizer = pipeline("text-to-speech", "microsoft/speecht5_tts", device=device)
+# Cargar embeddings de speaker para variedad
+embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+speaker_embeddings = {
+    "hombre_1": torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0),
+    "mujer_1": torch.tensor(embeddings_dataset[7300]["xvector"]).unsqueeze(0),
+}
+def text_to_speech(text, speaker="hombre_1"):
     if not text or not text.strip():
+        return None, "⚠️ Ingresa texto"
+    if len(text) > 200:
+        return None, "⚠️ Máximo 200 caracteres"
     try:
+        # Generar
+        speech = synthesizer(
+            text,
+            forward_params={"speaker_embeddings": speaker_embeddings[speaker]}
         )
+        # Guardar
+        output_path = "output.wav"
+        sf.write(output_path, speech["audio"], samplerate=speech["sampling_rate"])
+        return output_path, "✅ ¡Listo!"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
+with gr.Blocks(title="🎙️ TTS Local Español") as demo:
+    gr.Markdown("# 🎙️ Texto a Voz - Modelo Local")
     with gr.Row():
+        text_input = gr.Textbox(label="Texto", lines=3)
+        speaker_select = gr.Dropdown(
+            choices=["hombre_1", "mujer_1"],
+            value="hombre_1",
+            label="Voz"
+        )
+    btn = gr.Button("Generar", variant="primary")
+    audio = gr.Audio(label="Audio")
+    status = gr.Textbox(label="Estado")
+    btn.click(fn=text_to_speech, inputs=[text_input, speaker_select], outputs=[audio, status])
 if __name__ == "__main__":
     demo.launch()