Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from huggingface_hub import login | |
| # Obtener el token directamente del secreto de Hugging Face | |
| huggingface_token = os.environ.get('reparfinal') | |
| if huggingface_token is None: | |
| raise ValueError("El token de Hugging Face no está configurado en los secretos del Space") | |
| # Iniciar sesión | |
| login(huggingface_token) | |
| # Configurar modelo | |
| model_id = "meta-llama/Llama-2-7b-chat-hf" # Cambiado al modelo de 7B | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| def respond_to_query(user_input): | |
| # Formato específico para Llama 2 Chat | |
| prompt = f"[INST] {user_input} [/INST]" | |
| inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate( | |
| inputs, | |
| max_new_tokens=256, | |
| do_sample=True, | |
| top_p=0.95, | |
| top_k=50, | |
| temperature=0.7, | |
| repetition_penalty=1.1 | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Interfaz Gradio | |
| interface = gr.Interface( | |
| fn=respond_to_query, | |
| inputs=gr.Textbox(label="Tu pregunta"), | |
| outputs=gr.Textbox(label="Respuesta"), | |
| title="Chatbot con Llama-2-7b", | |
| description="Haz una pregunta y el modelo te responderá" | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |