gopi30's picture
Update app.py
b43201f verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from peft import PeftModel
BASE = "ybelkada/falcon-7b-sharded-bf16"
ADAPTER = "gopi30/phase-1-sft-legal-alligned"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(BASE, use_fast=False)
base = AutoModelForCausalLM.from_pretrained(
BASE,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
model = PeftModel.from_pretrained(base, ADAPTER)
model.eval()
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
def chat(message, history):
prompt = ""
for user_msg, bot_msg in history:
prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
prompt += f"User: {message}\nAssistant:"
outputs = pipe(prompt, max_new_tokens=256, temperature=0.7, top_p=0.9)
reply = outputs[0]["generated_text"].split("Assistant:")[-1].strip()
return reply
with gr.Blocks() as demo:
gr.ChatInterface(chat, title="Falcon + LoRA Chatbot")
demo.launch()