Spaces:

azan888
/

vision_chat

Sleeping

dev

f2204a9 8 months ago

1.18 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	# ✅ Use a model that works on CPU
	model_id = "microsoft/phi-2"

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id, torch_dtype=torch.float32, device_map="auto"
	)

	# Create generation pipeline
	generator = pipeline(
	"text-generation", model=model, tokenizer=tokenizer, do_sample=True, temperature=0.7
	)


	# ✅ Correct format: return dict with response key
	def chat_fn(message, history):
	# Optional: recreate full prompt from history
	prompt = ""
	for turn in history:
	prompt += f"<\|user\|>\n{turn['content']}\n<\|assistant\|>\n{turn['response']}\n"
	prompt += f"<\|user\|>\n{message}\n<\|assistant\|>\n"

	output = generator(prompt, max_new_tokens=256)[0]["generated_text"]
	reply = output.replace(prompt, "").strip()

	return {"response": reply}


	# ✅ Gradio app: ChatInterface uses type="messages" by default
	chatbot_ui = gr.ChatInterface(
	fn=chat_fn,
	title="Phi-2 Chatbot",
	theme="default",
	)

	if __name__ == "__main__":
	chatbot_ui.launch()