chatphi / app.py
arudradey's picture
Update app.py
5f18925 verified
import gradio as gr
import torch
import time
import psutil
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
MODEL_ID = "microsoft/Phi-4-mini-instruct"
print(f"Loading {MODEL_ID}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="cpu",
torch_dtype="auto",
trust_remote_code=True
)
def get_ram():
return f"{psutil.virtual_memory().available / (1024**3):.2f} GB"
def generate_reply(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
for msg in history:
messages.append(msg)
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to("cpu")
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
generation_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=int(max_tokens),
do_sample=True if temp > 0 else False,
temperature=float(temp),
top_p=float(top_p),
repetition_penalty=float(rep_penalty),
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
output = ""
start = time.time()
tokens = 0
for new_text in streamer:
output += new_text
tokens += 1
elapsed = time.time() - start
tps = tokens / elapsed if elapsed > 0 else 0
stats = f"⚑ {tps:.2f} tok/s | RAM: {get_ram()}"
yield output, stats
with gr.Blocks(title="Phi-4 Mini Chat", fill_height=True ) as demo:
with gr.Sidebar():
system_prompt = gr.Textbox(
value="You are a helpful AI assistant.",
label="System Prompt",
lines=3
)
temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p")
rep_penalty = gr.Slider(1.0, 2.0, 1.1, step=0.05, label="Repetition Penalty")
max_tokens = gr.Slider(64, 1024, 256, step=64, label="Max Tokens")
stats_box = gr.Markdown("Ready")
gr.Markdown("# πŸ€– Phi-4 Mini")
# ❌ OLD (breaks)
# chatbot = gr.Chatbot(type="messages", height=500)
# βœ… FIXED (Gradio 3.x compatible)
chatbot = gr.Chatbot(height=350)
with gr.Row():
user_input = gr.Textbox(placeholder="Type message...", scale=4)
send_btn = gr.Button("Send", scale=1)
# Convert history format (tuple style)
def user_fn(msg, history):
history = history or []
history.append((msg, None))
return "", history
def bot_fn(history, system_prompt, t, p, mt, rp):
user_msg = history[-1][0]
# Convert to message format for model
msg_history = []
for u, b in history[:-1]:
msg_history.append({"role": "user", "content": u})
if b:
msg_history.append({"role": "assistant", "content": b})
generator = generate_reply(
msg_history,
system_prompt,
t,
p,
mt,
rp
)
history[-1] = (user_msg, "")
for text, stats in generator:
history[-1] = (user_msg, text)
yield history, stats
user_input.submit(user_fn, [user_input, chatbot], [user_input, chatbot]).then(
bot_fn,
[chatbot, system_prompt, temp, top_p, max_tokens, rep_penalty],
[chatbot, stats_box]
)
send_btn.click(user_fn, [user_input, chatbot], [user_input, chatbot]).then(
bot_fn,
[chatbot, system_prompt, temp, top_p, max_tokens, rep_penalty],
[chatbot, stats_box]
)
if __name__ == "__main__":
demo.launch()