Spaces:
Runtime error
Runtime error
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| import gradio as gr | |
| import os | |
| hf_token = os.environ["HUGGINGFACE_HUB_TOKEN"] | |
| # ⚙️ Carga del modelo (opcional: .to("cuda") si tienes GPU) | |
| def cargar_modelo(): | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "hedtorresca/llama3-peftLora-insurance-finetuned", token=hf_token | |
| ).to("cpu") | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "hedtorresca/llama3-peftLora-insurance-finetuned", token=hf_token | |
| ) | |
| return model, tokenizer | |
| model, tokenizer = cargar_modelo() | |
| # 🔁 Función de inferencia | |
| def preguntar_al_modelo(instruction, max_new_tokens=100): | |
| prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True) | |
| respuesta = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return respuesta | |
| # 🚀 Interfaz Gradio como API | |
| interface = gr.Interface( | |
| fn=preguntar_al_modelo, | |
| inputs=[ | |
| gr.Textbox(label="Pregunta", placeholder="¿Cuál es el impacto de la IA en los seguros?"), | |
| gr.Slider(20, 400, value=100, step=10, label="Max tokens generados") | |
| ], | |
| outputs="text", | |
| title="Asistente IA - Seguro 2030", | |
| description="Modelo finetuneado con el documento de McKinsey sobre el futuro del seguro" | |
| ) | |