File size: 3,939 Bytes
303e3b5 cc74715 303e3b5 191ba31 d13590e 303e3b5 cc74715 191ba31 303e3b5 cc74715 303e3b5 b9ed3f3 dd6166a b9ed3f3 303e3b5 cc74715 3363a7d 303e3b5 cc74715 b9ed3f3 cc74715 b9ed3f3 cc74715 7026a44 cc74715 191ba31 d1a4213 191ba31 cc74715 191ba31 303e3b5 cc74715 7026a44 cc74715 b9ed3f3 cc74715 303e3b5 cc74715 303e3b5 cc74715 191ba31 303e3b5 cc74715 191ba31 cc74715 5f18925 cc74715 7026a44 cc74715 dd6166a cc74715 5f18925 cc74715 dd6166a 5f18925 7026a44 cc74715 303e3b5 dd6166a cc74715 dd6166a c325527 303e3b5 cc74715 dd6166a cc74715 dd6166a 303e3b5 cc74715 dd6166a cc74715 303e3b5 cc74715 dd6166a cc74715 191ba31 303e3b5 cc74715 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | import gradio as gr
import torch
import time
import psutil
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
MODEL_ID = "microsoft/Phi-4-mini-instruct"
print(f"Loading {MODEL_ID}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="cpu",
torch_dtype="auto",
trust_remote_code=True
)
def get_ram():
return f"{psutil.virtual_memory().available / (1024**3):.2f} GB"
def generate_reply(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
for msg in history:
messages.append(msg)
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to("cpu")
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
generation_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=int(max_tokens),
do_sample=True if temp > 0 else False,
temperature=float(temp),
top_p=float(top_p),
repetition_penalty=float(rep_penalty),
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
output = ""
start = time.time()
tokens = 0
for new_text in streamer:
output += new_text
tokens += 1
elapsed = time.time() - start
tps = tokens / elapsed if elapsed > 0 else 0
stats = f"⚡ {tps:.2f} tok/s | RAM: {get_ram()}"
yield output, stats
with gr.Blocks(title="Phi-4 Mini Chat", fill_height=True ) as demo:
with gr.Sidebar():
system_prompt = gr.Textbox(
value="You are a helpful AI assistant.",
label="System Prompt",
lines=3
)
temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p")
rep_penalty = gr.Slider(1.0, 2.0, 1.1, step=0.05, label="Repetition Penalty")
max_tokens = gr.Slider(64, 1024, 256, step=64, label="Max Tokens")
stats_box = gr.Markdown("Ready")
gr.Markdown("# 🤖 Phi-4 Mini")
# ❌ OLD (breaks)
# chatbot = gr.Chatbot(type="messages", height=500)
# ✅ FIXED (Gradio 3.x compatible)
chatbot = gr.Chatbot(height=350)
with gr.Row():
user_input = gr.Textbox(placeholder="Type message...", scale=4)
send_btn = gr.Button("Send", scale=1)
# Convert history format (tuple style)
def user_fn(msg, history):
history = history or []
history.append((msg, None))
return "", history
def bot_fn(history, system_prompt, t, p, mt, rp):
user_msg = history[-1][0]
# Convert to message format for model
msg_history = []
for u, b in history[:-1]:
msg_history.append({"role": "user", "content": u})
if b:
msg_history.append({"role": "assistant", "content": b})
generator = generate_reply(
msg_history,
system_prompt,
t,
p,
mt,
rp
)
history[-1] = (user_msg, "")
for text, stats in generator:
history[-1] = (user_msg, text)
yield history, stats
user_input.submit(user_fn, [user_input, chatbot], [user_input, chatbot]).then(
bot_fn,
[chatbot, system_prompt, temp, top_p, max_tokens, rep_penalty],
[chatbot, stats_box]
)
send_btn.click(user_fn, [user_input, chatbot], [user_input, chatbot]).then(
bot_fn,
[chatbot, system_prompt, temp, top_p, max_tokens, rep_penalty],
[chatbot, stats_box]
)
if __name__ == "__main__":
demo.launch() |