File size: 1,857 Bytes
dc0bb4c 2ba0f71 dc0bb4c 9a972c0 0e4c2bd 9a972c0 2ba0f71 0e4c2bd 9a972c0 2ba0f71 9a972c0 2ba0f71 dc0bb4c 9a972c0 2ba0f71 0e4c2bd 9a972c0 dc0bb4c 9a972c0 2ba0f71 9a972c0 2ba0f71 9a972c0 2ba0f71 9a972c0 dc0bb4c 9a972c0 dc0bb4c 9a972c0 2ba0f71 dc0bb4c 9a972c0 dc0bb4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# Your adapter (LoRA fine-tuned model on Hugging Face)
ADAPTER_ID = "Anabury/My_Finetuned_Phi-4"
# Detect device
USE_GPU = torch.cuda.is_available()
# Pick base model depending on device
if USE_GPU:
BASE_MODEL = "unsloth/phi-4-unsloth-bnb-4bit" # fast + quantized
else:
BASE_MODEL = "unsloth/phi-4" # full precision for CPU
print(f"Loading base model: {BASE_MODEL} on {'GPU' if USE_GPU else 'CPU'}")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
# Load base model
base = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
device_map="auto" if USE_GPU else None,
torch_dtype=torch.float16 if USE_GPU else torch.float32,
trust_remote_code=True
)
# Attach your LoRA adapter
model = PeftModel.from_pretrained(base, ADAPTER_ID)
model.eval()
# Chat function
def chat(message, history):
# simple prompt, you can swap in chat template later
inputs = tokenizer(message, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
)
reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
history.append((message, reply))
return history, history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🧠 Phi-4 Chatbot (Fine-tuned)")
chatbot = gr.Chatbot(height=420)
msg = gr.Textbox(placeholder="Ask me anything…")
clear = gr.Button("Clear")
msg.submit(chat, [msg, chatbot], [chatbot, chatbot])
clear.click(lambda: [], None, chatbot, queue=False)
demo.launch()
|