File size: 3,493 Bytes
691bb52 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | import gradio as gr
from PIL import Image
from transformers import BlipProcessor, BlipForQuestionAnswering
# 預先載入可選模型(避免每次都下載)
AVAILABLE_MODELS = {
"BLIP VQA Base": "Salesforce/blip-vqa-base",
"BLIP VQA Large (CapFilt)": "Salesforce/blip-vqa-capfilt-large",
}
# 預設模型
current_model_name = list(AVAILABLE_MODELS.keys())[0]
processor = BlipProcessor.from_pretrained(AVAILABLE_MODELS[current_model_name])
model = BlipForQuestionAnswering.from_pretrained(AVAILABLE_MODELS[current_model_name])
# 模型切換功能
def change_model(model_choice):
global processor, model, current_model_name
current_model_name = model_choice
model_id = AVAILABLE_MODELS[model_choice]
processor = BlipProcessor.from_pretrained(model_id)
model = BlipForQuestionAnswering.from_pretrained(model_id)
return f"✅ Switched to: {model_choice}"
# 問答邏輯
def answer_question(history, image, question):
if image is None:
return history + [("Please upload an image first.", None)]
if not question.strip():
return history + [("Please enter a question.", None)]
inputs = processor(image, question, return_tensors="pt")
out = model.generate(**inputs, max_new_tokens=50)
answer = processor.decode(out[0], skip_special_tokens=True)
reply = f"🤖({current_model_name}) Answer: {answer}"
return history + [(question, reply)]
# 上傳新圖片時重設聊天
def reset_chat(_):
return []
# 建立 Gradio 介面
def build_ui():
with gr.Blocks(title="Vision-Language Chatbot") as demo:
gr.Markdown("## 🤖 Vision-Language Chatbot")
gr.Markdown("Upload an image and ask multiple questions about it!")
# 模型選擇
model_selector = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value=current_model_name,
label="Select Model",
)
model_status = gr.Markdown(f"✅ Current model: {current_model_name}")
# 版面配置
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="pil", label="Upload Image")
with gr.Column(scale=2):
question_input = gr.Textbox(
placeholder="Ask something about the image...",
label="Question",
)
ask_btn = gr.Button("Ask", variant="primary")
clear_btn = gr.Button("Clear Chat")
chatbot = gr.Chatbot(height=400, label="Chat History")
# 邏輯綁定
ask_btn.click(
fn=answer_question,
inputs=[chatbot, image_input, question_input],
outputs=chatbot,
)
clear_btn.click(fn=lambda: [], outputs=chatbot)
image_input.change(fn=reset_chat, inputs=image_input, outputs=chatbot)
model_selector.change(
fn=change_model,
inputs=model_selector,
outputs=model_status,
)
# 範例
gr.Examples(
examples=[
["sample_images/app.jpg", "How many apples are in the picture?"],
["sample_images/cat_dog.jpg", "What animals are in the image?"],
["sample_images/city.jpg", "What is the man doing?"]
],
inputs=[image_input, question_input],
label="🏞️ Example Inputs",
)
return demo
if __name__ == "__main__":
build_ui().launch()
|