vision_chat / app.py
azan888's picture
dev
f2204a9
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# ✅ Use a model that works on CPU
model_id = "microsoft/phi-2"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id, torch_dtype=torch.float32, device_map="auto"
)
# Create generation pipeline
generator = pipeline(
"text-generation", model=model, tokenizer=tokenizer, do_sample=True, temperature=0.7
)
# ✅ Correct format: return dict with response key
def chat_fn(message, history):
# Optional: recreate full prompt from history
prompt = ""
for turn in history:
prompt += f"<|user|>\n{turn['content']}\n<|assistant|>\n{turn['response']}\n"
prompt += f"<|user|>\n{message}\n<|assistant|>\n"
output = generator(prompt, max_new_tokens=256)[0]["generated_text"]
reply = output.replace(prompt, "").strip()
return {"response": reply}
# ✅ Gradio app: ChatInterface uses type="messages" by default
chatbot_ui = gr.ChatInterface(
fn=chat_fn,
title="Phi-2 Chatbot",
theme="default",
)
if __name__ == "__main__":
chatbot_ui.launch()