Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig | |
| from peft import PeftModel | |
| BASE = "ybelkada/falcon-7b-sharded-bf16" | |
| ADAPTER = "gopi30/phase-1-sft-legal-alligned" | |
| bnb_config = BitsAndBytesConfig(load_in_8bit=True) | |
| tokenizer = AutoTokenizer.from_pretrained(BASE, use_fast=False) | |
| base = AutoModelForCausalLM.from_pretrained( | |
| BASE, | |
| quantization_config=bnb_config, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| model = PeftModel.from_pretrained(base, ADAPTER) | |
| model.eval() | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto") | |
| def chat(message, history): | |
| prompt = "" | |
| for user_msg, bot_msg in history: | |
| prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| outputs = pipe(prompt, max_new_tokens=256, temperature=0.7, top_p=0.9) | |
| reply = outputs[0]["generated_text"].split("Assistant:")[-1].strip() | |
| return reply | |
| with gr.Blocks() as demo: | |
| gr.ChatInterface(chat, title="Falcon + LoRA Chatbot") | |
| demo.launch() | |