Spaces:
Paused
Paused
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import gradio as gr | |
| # Variables globales para el modelo de chat | |
| chat_model_state = None | |
| chat_tokenizer_state = None | |
| # Inicialización de ZeroGPU (opcional) | |
| def initialize_zero_gpu(): | |
| """Inicializa ZeroGPU si es requerido por el entorno.""" | |
| try: | |
| import spaces | |
| spaces.GPU(lambda x: x) # Realiza una inicialización dummy | |
| print("ZeroGPU inicializado correctamente.") | |
| except ImportError: | |
| print("ZeroGPU no está disponible o no es necesario en este entorno.") | |
| # Llamamos a la inicialización de ZeroGPU al inicio | |
| initialize_zero_gpu() | |
| def load_chat_model(): | |
| """Función para cargar el modelo de chat.""" | |
| global chat_model_state, chat_tokenizer_state | |
| try: | |
| model_name = "Qwen/Qwen2.5-3B-Instruct" | |
| # Cargar el modelo en CPU o GPU según disponibilidad | |
| chat_model_state = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" if torch.cuda.is_available() else None | |
| ) | |
| chat_tokenizer_state = AutoTokenizer.from_pretrained(model_name) | |
| print("Modelo cargado exitosamente.") | |
| except Exception as e: | |
| print(f"Error al cargar el modelo de chat: {e}") | |
| def generate_response(messages, model, tokenizer): | |
| """Genera una respuesta usando el modelo de chat.""" | |
| try: | |
| if model is None or tokenizer is None: | |
| raise ValueError("El modelo de chat o el tokenizer no están cargados.") | |
| # Construir el prompt manualmente a partir del historial de mensajes | |
| prompt = "" | |
| for message in messages: | |
| role = message.get("role", "") | |
| content = message.get("content", "") | |
| if role == "system": | |
| prompt += f"System: {content}\n" | |
| elif role == "user": | |
| prompt += f"User: {content}\n" | |
| elif role == "assistant": | |
| prompt += f"Assistant: {content}\n" | |
| prompt += "Assistant:" | |
| # Tokenizar el prompt | |
| model_inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| generated_ids = model.generate( | |
| **model_inputs, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| top_p=0.95, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| # Decodificar la respuesta generada | |
| generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) | |
| # Extraer solo la respuesta del asistente | |
| response = generated_text[len(prompt):].strip() | |
| return response | |
| except Exception as e: | |
| print(f"Error en generate_response: {e}") | |
| return "Lo siento, ocurrió un error al generar la respuesta." | |
| # Gradio Interface | |
| with gr.Blocks() as app_chat: | |
| gr.Markdown("### Chatbot Simple (CPU/GPU Compatible)") | |
| chatbot_interface = gr.Chatbot(label="Conversación") | |
| text_input_chat = gr.Textbox(label="Escribe tu mensaje", lines=1) | |
| send_btn_chat = gr.Button("Enviar") | |
| clear_btn_chat = gr.Button("Limpiar Conversación") | |
| conversation_state = gr.State( | |
| value=[{"role": "system", "content": "Eres un chatbot. Responde a las preguntas del usuario de manera concisa y clara."}] | |
| ) | |
| def process_input(text, history, conv_state): | |
| """Procesa la entrada de texto del usuario y genera una respuesta.""" | |
| if not text.strip(): | |
| return history, conv_state, "" | |
| conv_state.append({"role": "user", "content": text}) | |
| history.append((text, None)) | |
| # Generar la respuesta del modelo de chat | |
| response = generate_response(conv_state, chat_model_state, chat_tokenizer_state) | |
| conv_state.append({"role": "assistant", "content": response}) | |
| history[-1] = (text, response) | |
| return history, conv_state, "" | |
| def clear_conversation(): | |
| """Resetea la conversación""" | |
| return [], [{"role": "system", "content": "Eres un chatbot. Responde a las preguntas del usuario de manera concisa y clara."}] | |
| # Manejar entrada de texto y botones | |
| text_input_chat.submit( | |
| process_input, | |
| inputs=[text_input_chat, chatbot_interface, conversation_state], | |
| outputs=[chatbot_interface, conversation_state, text_input_chat], | |
| ) | |
| send_btn_chat.click( | |
| process_input, | |
| inputs=[text_input_chat, chatbot_interface, conversation_state], | |
| outputs=[chatbot_interface, conversation_state, text_input_chat], | |
| ) | |
| clear_btn_chat.click( | |
| clear_conversation, | |
| outputs=[chatbot_interface, conversation_state], | |
| ) | |
| # Cargar el modelo al iniciar | |
| load_chat_model() | |
| # Ejecutar la aplicación con configuración segura | |
| app_chat.launch( | |
| server_name="0.0.0.0", # Permite acceso local | |
| server_port=7860, # Puerto local | |
| share=True # Crea un enlace público | |
| ) | |