Spaces:

alex16052G
/

chatbot

Paused

App Files Files Community

alex16052G commited on Jan 23, 2025

Commit

34f9233

verified ·

1 Parent(s): 0728541

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -436

app.py CHANGED Viewed

@@ -1,70 +1,26 @@
-# chat_ai.py
-# ruff: noqa: E402
-# Above allows ruff to ignore E402: module level import not at top of file
-import re
-import tempfile
-import os
 import torch
-import click
 import gradio as gr
-import numpy as np
-import soundfile as sf
-import torchaudio
-from cached_path import cached_path
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    WhisperProcessor,
-    WhisperForConditionalGeneration,
-)
-from num2words import num2words
-try:
-    import spaces
-    USING_SPACES = True
-except ImportError:
-    USING_SPACES = False
-def gpu_decorator(func):
-    if USING_SPACES:
-        return spaces.GPU(func)
-    else:
-        return func
-from f5_tts.model import DiT
-from f5_tts.infer.utils_infer import (
-    load_vocoder,
-    load_model,
-    preprocess_ref_audio_text,
-    infer_process,
-    remove_silence_for_generated_wav,
-    save_spectrogram,
-)
-# Cargar el vocoder
-vocoder = load_vocoder()
-# Configuración y carga del modelo F5-TTS
-F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
-F5TTS_ema_model = load_model(
-    DiT, F5TTS_model_cfg, str(cached_path("hf://jpgallegoar/F5-Spanish/model_1200000.safetensors"))
-)
 # Variables globales para el modelo de chat
 chat_model_state = None
 chat_tokenizer_state = None
-# Cargar el modelo Whisper para transcripción
-whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base")
-whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
-whisper_model.eval()
-if torch.cuda.is_available():
-    whisper_model.to("cuda")
-@gpu_decorator
 def generate_response(messages, model, tokenizer):
     """Genera una respuesta usando el modelo de chat"""
     try:
@@ -100,397 +56,68 @@ def generate_response(messages, model, tokenizer):
         # Extraer solo la respuesta del asistente
         response = generated_text[len(prompt):].strip()
-        # Opcional: Cortar la respuesta al primer salto de línea
-        response = response.split("\n")[0]
         return response
     except Exception as e:
-        # Log del error para depuración
         print(f"Error en generate_response: {e}")
         return "Lo siento, ocurrió un error al generar la respuesta."
-def traducir_numero_a_texto(texto):
-    """Convierte números en texto a su representación en palabras en español"""
-    texto_separado = re.sub(r'([A-Za-z])(\d)', r'\1 \2', texto)
-    texto_separado = re.sub(r'(\d)([A-Za-z])', r'\1 \2', texto_separado)
-    def reemplazar_numero(match):
-        numero = match.group()
-        try:
-            return num2words(int(numero), lang='es')
-        except ValueError:
-            return numero
-    texto_traducido = re.sub(r'\b\d+\b', reemplazar_numero, texto_separado)
-    return texto_traducido
-@gpu_decorator
-def infer(
-    ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=1
-):
-    """Genera el audio sintetizado a partir del texto"""
-    try:
-        ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text)
-        ema_model = F5TTS_ema_model
-        if not gen_text.startswith(" "):
-            gen_text = " " + gen_text
-        if not gen_text.endswith(". "):
-            gen_text += ". "
-        gen_text = gen_text.lower()
-        gen_text = traducir_numero_a_texto(gen_text)
-        final_wave, final_sample_rate, combined_spectrogram = infer_process(
-            ref_audio,
-            ref_text,
-            gen_text,
-            ema_model,
-            vocoder,
-            cross_fade_duration=cross_fade_duration,
-            speed=speed,
-            progress=gr.Progress(),
-        )
-        # Eliminar silencios si está activado
-        if remove_silence:
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-                sf.write(f.name, final_wave, final_sample_rate)
-                remove_silence_for_generated_wav(f.name)
-                final_wave, _ = torchaudio.load(f.name)
-            final_wave = final_wave.squeeze().cpu().numpy()
-        # Guardar el espectrograma (opcional)
-        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
-            spectrogram_path = tmp_spectrogram.name
-            save_spectrogram(combined_spectrogram, spectrogram_path)
-        return (final_sample_rate, final_wave), spectrogram_path
-    except Exception as e:
-        # Log del error para depuración
-        print(f"Error en infer: {e}")
-        return None, None
-def load_chat_model_function():
-    """Función para cargar el modelo de chat"""
-    global chat_model_state, chat_tokenizer_state
-    if chat_model_state is None:
-        try:
-            model_name = "Qwen/Qwen2.5-3B-Instruct"
-            chat_model_state = AutoModelForCausalLM.from_pretrained(
-                model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None
-            )
-            chat_tokenizer_state = AutoTokenizer.from_pretrained(model_name)
-            return gr.update(visible=False), gr.update(visible=True)
-        except Exception as e:
-            print(f"Error al cargar el modelo de chat: {e}")
-            return gr.update(value="Error al cargar el modelo de chat."), gr.update(visible=False)
-    else:
-        return gr.update(visible=False), gr.update(visible=True)
-def transcribe_audio(audio_path):
-    """Transcribe el audio usando el modelo Whisper"""
-    try:
-        if not os.path.exists(audio_path):
-            raise FileNotFoundError(f"Archivo de audio no encontrado: {audio_path}")
-        # Cargar el audio
-        audio, rate = torchaudio.load(audio_path)
-        # Resample si es necesario
-        if rate != 16000:
-            resampler = torchaudio.transforms.Resample(orig_freq=rate, new_freq=16000)
-            audio = resampler(audio)
-        # Asegurarse de que el audio tenga una sola dimensión
-        if audio.ndim > 1:
-            audio = torch.mean(audio, dim=0)
-        input_features = whisper_processor(audio.cpu().numpy(), sampling_rate=16000, return_tensors="pt").input_features
-        if torch.cuda.is_available():
-            input_features = input_features.to("cuda")
-        # Generar la transcripción
-        predicted_ids = whisper_model.generate(input_features)
-        transcription = whisper_processor.decode(predicted_ids[0], skip_special_tokens=True)
-        return transcription
-    except Exception as e:
-        print(f"Error en transcribe_audio: {e}")
-        return None
 with gr.Blocks() as app_chat:
-    gr.Markdown(
-        """
-# Chat de Voz
-¡Mantén una conversación con una IA usando tu voz de referencia!
-1. Sube un clip de audio de referencia y opcionalmente su transcripción.
-2. Carga el modelo de chat.
-3. Graba tu mensaje a través de tu micrófono.
-4. La IA responderá usando la voz de referencia.
-        """
     )
-    if not USING_SPACES:
-        load_chat_model_btn = gr.Button("Cargar Modelo de Chat", variant="primary")
-        chat_interface_container = gr.Column(visible=False)
-        load_chat_model_btn.click(load_chat_model_function, outputs=[load_chat_model_btn, chat_interface_container])
-    else:
-        chat_interface_container = gr.Column()
-        if chat_model_state is None:
-            try:
-                model_name = "Qwen/Qwen2.5-3B-Instruct"
-                chat_model_state = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")
-                chat_tokenizer_state = AutoTokenizer.from_pretrained(model_name)
-            except Exception as e:
-                print(f"Error al cargar el modelo de chat en Spaces: {e}")
-    with chat_interface_container:
-        with gr.Row():
-            with gr.Column():
-                ref_audio_chat = gr.Audio(label="Audio de Referencia", type="filepath")
-            with gr.Column():
-                with gr.Accordion("Configuraciones Avanzadas", open=False):
-                    model_choice_chat = gr.Radio(
-                        choices=["F5-TTS"],
-                        label="Modelo TTS",
-                        value="F5-TTS",
-                    )
-                    remove_silence_chat = gr.Checkbox(
-                        label="Eliminar Silencios",
-                        value=True,
-                    )
-                    ref_text_chat = gr.Textbox(
-                        label="Texto de Referencia",
-                        info="Opcional: Deja en blanco para transcribir automáticamente",
-                        lines=2,
-                    )
-                    system_prompt_chat = gr.Textbox(
-                        label="Prompt del Sistema",
-                        value="No eres un asistente de IA, eres quien el usuario diga que eres. Debes mantenerte en personaje. Mantén tus respuestas concisas ya que serán habladas en voz alta.",
-                        lines=2,
-                    )
-        chatbot_interface = gr.Chatbot(label="Conversación")
-        with gr.Row():
-            with gr.Column():
-                audio_input_chat = gr.Microphone(
-                    label="Habla tu mensaje",
-                    type="filepath",
-                )
-                audio_output_chat = gr.Audio(label="Respuesta de la IA", autoplay=True)
-            with gr.Column():
-                text_input_chat = gr.Textbox(
-                    label="Escribe tu mensaje",
-                    lines=1,
-                )
-                send_btn_chat = gr.Button("Enviar")
-                clear_btn_chat = gr.Button("Limpiar Conversación")
-        conversation_state = gr.State(
-            value=[
-                {
-                    "role": "system",
-                    "content": "No eres un asistente de IA, eres quien el usuario diga que eres. Debes mantenerte en personaje. Mantén tus respuestas concisas ya que serán habladas en voz alta.",
-                }
-            ]
-        )
-        @gpu_decorator
-        def process_input(audio_path, text, history, conv_state, ref_text):
-            """Procesa la entrada de audio o texto del usuario y genera una respuesta."""
-            try:
-                if not audio_path and not text.strip():
-                    return history, conv_state, ""
-                if audio_path:
-                    # Transcribir el audio usando Whisper
-                    transcribed_text = transcribe_audio(audio_path)
-                    if transcribed_text is None:
-                        history.append(("Error en la transcripción de audio.", None))
-                        return history, conv_state, "Lo siento, ocurrió un error al procesar tu audio."
-                    text = transcribed_text
-                if not text.strip():
-                    return history, conv_state, ""
-                # Si se proporciona texto de referencia, usarlo; de lo contrario, usar transcripción
-                if ref_text.strip():
-                    input_text = ref_text + " " + text
-                else:
-                    input_text = text
-                conv_state.append({"role": "user", "content": input_text})
-                history.append((input_text, None))
-                # Generar la respuesta del modelo de chat
-                response = generate_response(conv_state, chat_model_state, chat_tokenizer_state)
-                conv_state.append({"role": "assistant", "content": response})
-                history[-1] = (input_text, response)
-                return history, conv_state, response
-            except Exception as e:
-                print(f"Error en process_input: {e}")
-                history.append(("Error al procesar tu solicitud.", None))
-                return history, conv_state, "Lo siento, ocurrió un error al procesar tu solicitud."
-        @gpu_decorator
-        def generate_audio_response(response, ref_audio, ref_text, model, remove_silence):
-            """Genera el audio de respuesta para la IA."""
-            try:
-                if not response or not ref_audio:
-                    return None
-                # Verificar si la respuesta es un mensaje de error
-                if response.startswith("Lo siento"):
-                    # Aquí podrías tener un audio pregrabado de error
-                    # Por ejemplo, "error_audio.wav" en el mismo directorio
-                    error_audio_path = "error_audio.wav"
-                    if os.path.exists(error_audio_path):
-                        return error_audio_path
-                    else:
-                        # Si no tienes un archivo de audio de error, puedes retornar None
-                        # O generar el audio dinámicamente usando infer
-                        audio_result, _ = infer(
-                            ref_audio,
-                            ref_text,
-                            response,
-                            model,
-                            remove_silence,
-                            cross_fade_duration=0.15,
-                            speed=1.0,
-                        )
-                        return audio_result
-                else:
-                    # Generar el audio de la respuesta normal
-                    audio_result, _ = infer(
-                        ref_audio,
-                        ref_text,
-                        response,
-                        model,
-                        remove_silence,
-                        cross_fade_duration=0.15,
-                        speed=1.0,
-                    )
-                    if audio_result is None:
-                        # Retornar un audio de error si infer falla
-                        error_audio_path = "error_audio.wav"
-                        if os.path.exists(error_audio_path):
-                            return error_audio_path
-                        else:
-                            return None
-                    return audio_result
-            except Exception as e:
-                print(f"Error en generate_audio_response: {e}")
-                # Retornar un audio de error si ocurre una excepción
-                error_audio_path = "error_audio.wav"
-                if os.path.exists(error_audio_path):
-                    return error_audio_path
-                else:
-                    return None
-        def clear_conversation():
-            """Reset the conversation"""
-            return [], [
-                {
-                    "role": "system",
-                    "content": "No eres un asistente de IA, eres quien el usuario diga que eres. Debes mantenerte en personaje. Mantén tus respuestas concisas ya que serán habladas en voz alta.",
-                }
-            ]
-        def update_system_prompt(new_prompt):
-            """Update the system prompt and reset the conversation"""
-            new_conv_state = [{"role": "system", "content": new_prompt}]
-            return [], new_conv_state
-        # Manejar la entrada de audio
-        audio_input_chat.change(
-            process_input,
-            inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state, ref_text_chat],
-            outputs=[chatbot_interface, conversation_state, text_input_chat],
-        ).then(
-            generate_audio_response,
-            inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
-            outputs=[audio_output_chat],
-        ).then(
-            lambda: None,
-            None,
-            audio_input_chat,
-        )
-        # Manejar la entrada de texto
-        text_input_chat.submit(
-            process_input,
-            inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state, ref_text_chat],
-            outputs=[chatbot_interface, conversation_state, text_input_chat],
-        ).then(
-            generate_audio_response,
-            inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
-            outputs=[audio_output_chat],
-        ).then(
-            lambda: None,
-            None,
-            text_input_chat,
-        )
-        # Manejar el botón de enviar
-        send_btn_chat.click(
-            process_input,
-            inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state, ref_text_chat],
-            outputs=[chatbot_interface, conversation_state, text_input_chat],
-        ).then(
-            generate_audio_response,
-            inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
-            outputs=[audio_output_chat],
-        ).then(
-            lambda: None,
-            None,
-            text_input_chat,
-        )
-        # Manejar el botón de limpiar conversación
-        clear_btn_chat.click(
-            clear_conversation,
-            outputs=[chatbot_interface, conversation_state],
-        )
-        # Manejar cambios en el prompt del sistema
-        system_prompt_chat.change(
-            update_system_prompt,
-            inputs=system_prompt_chat,
-            outputs=[chatbot_interface, conversation_state],
-        )
-@click.command()
-@click.option("--port", "-p", default=None, type=int, help="Puerto para ejecutar la aplicación")
-@click.option("--host", "-H", default=None, help="Host para ejecutar la aplicación")
-@click.option(
-    "--share",
-    "-s",
-    default=False,
-    is_flag=True,
-    help="Compartir la aplicación a través de un enlace compartido de Gradio",
-)
-@click.option("--api", "-a", default=True, is_flag=True, help="Permitir acceso a la API")
-def main(port, host, share, api):
-    """Función principal para lanzar la aplicación Gradio de Chat AI."""
-    print("Iniciando la aplicación de Chat AI...")
-    app_chat.queue(api_open=api).launch(
-        server_name=host,
-        server_port=port,
-        share=share,
-        show_api=api
     )
-if __name__ == "__main__":
-    if not USING_SPACES:
-        main()
-    else:
-        app_chat.queue().launch(share=True)  # Asegura que 'share=True' si se usa Spaces

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 # Variables globales para el modelo de chat
 chat_model_state = None
 chat_tokenizer_state = None
+def load_chat_model():
+    """Función para cargar el modelo de chat"""
+    global chat_model_state, chat_tokenizer_state
+    try:
+        model_name = "Qwen/Qwen2.5-3B-Instruct"
+        chat_model_state = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None
+        )
+        chat_tokenizer_state = AutoTokenizer.from_pretrained(model_name)
+        print("Modelo cargado exitosamente.")
+    except Exception as e:
+        print(f"Error al cargar el modelo de chat: {e}")
 def generate_response(messages, model, tokenizer):
     """Genera una respuesta usando el modelo de chat"""
     try:
         # Extraer solo la respuesta del asistente
         response = generated_text[len(prompt):].strip()
         return response
     except Exception as e:
         print(f"Error en generate_response: {e}")
         return "Lo siento, ocurrió un error al generar la respuesta."
+# Gradio Interface
 with gr.Blocks() as app_chat:
+    gr.Markdown("### Chatbot Simple")
+    chatbot_interface = gr.Chatbot(label="Conversación")
+    text_input_chat = gr.Textbox(label="Escribe tu mensaje", lines=1)
+    send_btn_chat = gr.Button("Enviar")
+    clear_btn_chat = gr.Button("Limpiar Conversación")
+    conversation_state = gr.State(
+        value=[
+            {
+                "role": "system",
+                "content": "Eres un chatbot. Responde a las preguntas del usuario de manera concisa y clara.",
+            }
+        ]
     )
+    def process_input(text, history, conv_state):
+        """Procesa la entrada de texto del usuario y genera una respuesta."""
+        if not text.strip():
+            return history, conv_state, ""
+        conv_state.append({"role": "user", "content": text})
+        history.append((text, None))
+        # Generar la respuesta del modelo de chat
+        response = generate_response(conv_state, chat_model_state, chat_tokenizer_state)
+        conv_state.append({"role": "assistant", "content": response})
+        history[-1] = (text, response)
+        return history, conv_state, ""
+    def clear_conversation():
+        """Resetea la conversación"""
+        return [], [{"role": "system", "content": "Eres un chatbot. Responde a las preguntas del usuario de manera concisa y clara."}]
+    # Manejar entrada de texto y botones
+    text_input_chat.submit(
+        process_input,
+        inputs=[text_input_chat, chatbot_interface, conversation_state],
+        outputs=[chatbot_interface, conversation_state, text_input_chat],
+    )
+    send_btn_chat.click(
+        process_input,
+        inputs=[text_input_chat, chatbot_interface, conversation_state],
+        outputs=[chatbot_interface, conversation_state, text_input_chat],
+    )
+    clear_btn_chat.click(
+        clear_conversation,
+        outputs=[chatbot_interface, conversation_state],
     )
+# Cargar el modelo al iniciar
+load_chat_model()
+# Ejecutar la aplicación
+app_chat.launch()