phi3-mini-chat / app.py
AlexKitipov's picture
Create app.py
15664cf verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "AlexKitipov/Phi-3-mini-128k-instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
SYSTEM_PROMPT = "You are a helpful AI assistant."
def build_prompt(history, user_message):
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for user, assistant in history:
if user:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": user_message})
if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template:
return tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# fallback formatting
prompt = SYSTEM_PROMPT + "\n"
for m in messages:
role = m["role"].upper()
prompt += f"{role}: {m['content']}\n"
prompt += "ASSISTANT:"
return prompt
def chat_fn(message, history):
prompt = build_prompt(history, message)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
generated = tokenizer.decode(
output[0][inputs["input_ids"].shape[-1]:],
skip_special_tokens=True
)
return generated
demo = gr.ChatInterface(
fn=chat_fn,
title="Phi-3-mini-128k Chat",
description="Chat with the Phi-3-mini-128k-instruct model."
)
if __name__ == "__main__":
demo.launch()