Spaces:

idkWhatToUse
/

Vision-Language-Chatbot

Running

App Files Files Community

Vision-Language-Chatbot / app.py

idkWhatToUse

Upload app.py

691bb52 verified 8 months ago

Raw

History Blame Contribute Delete

3.49 kB

	import gradio as gr
	from PIL import Image
	from transformers import BlipProcessor, BlipForQuestionAnswering

	# 預先載入可選模型（避免每次都下載）
	AVAILABLE_MODELS = {
	"BLIP VQA Base": "Salesforce/blip-vqa-base",
	"BLIP VQA Large (CapFilt)": "Salesforce/blip-vqa-capfilt-large",
	}

	# 預設模型
	current_model_name = list(AVAILABLE_MODELS.keys())[0]
	processor = BlipProcessor.from_pretrained(AVAILABLE_MODELS[current_model_name])
	model = BlipForQuestionAnswering.from_pretrained(AVAILABLE_MODELS[current_model_name])


	# 模型切換功能
	def change_model(model_choice):
	global processor, model, current_model_name
	current_model_name = model_choice
	model_id = AVAILABLE_MODELS[model_choice]
	processor = BlipProcessor.from_pretrained(model_id)
	model = BlipForQuestionAnswering.from_pretrained(model_id)
	return f"✅ Switched to: {model_choice}"


	# 問答邏輯
	def answer_question(history, image, question):
	if image is None:
	return history + [("Please upload an image first.", None)]
	if not question.strip():
	return history + [("Please enter a question.", None)]

	inputs = processor(image, question, return_tensors="pt")
	out = model.generate(**inputs, max_new_tokens=50)
	answer = processor.decode(out[0], skip_special_tokens=True)
	reply = f"🤖({current_model_name}) Answer: {answer}"
	return history + [(question, reply)]


	# 上傳新圖片時重設聊天
	def reset_chat(_):
	return []


	# 建立 Gradio 介面
	def build_ui():
	with gr.Blocks(title="Vision-Language Chatbot") as demo:
	gr.Markdown("## 🤖 Vision-Language Chatbot")
	gr.Markdown("Upload an image and ask multiple questions about it!")

	# 模型選擇
	model_selector = gr.Dropdown(
	choices=list(AVAILABLE_MODELS.keys()),
	value=current_model_name,
	label="Select Model",
	)
	model_status = gr.Markdown(f"✅ Current model: {current_model_name}")

	# 版面配置
	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(type="pil", label="Upload Image")

	with gr.Column(scale=2):
	question_input = gr.Textbox(
	placeholder="Ask something about the image...",
	label="Question",
	)
	ask_btn = gr.Button("Ask", variant="primary")
	clear_btn = gr.Button("Clear Chat")

	chatbot = gr.Chatbot(height=400, label="Chat History")

	# 邏輯綁定
	ask_btn.click(
	fn=answer_question,
	inputs=[chatbot, image_input, question_input],
	outputs=chatbot,
	)

	clear_btn.click(fn=lambda: [], outputs=chatbot)
	image_input.change(fn=reset_chat, inputs=image_input, outputs=chatbot)

	model_selector.change(
	fn=change_model,
	inputs=model_selector,
	outputs=model_status,
	)

	# 範例
	gr.Examples(
	examples=[
	["sample_images/app.jpg", "How many apples are in the picture?"],
	["sample_images/cat_dog.jpg", "What animals are in the image?"],
	["sample_images/city.jpg", "What is the man doing?"]
	],
	inputs=[image_input, question_input],
	label="🏞️ Example Inputs",
	)

	return demo


	if __name__ == "__main__":
	build_ui().launch()