gouyongxiang commited on
Commit
5424f5c
·
verified ·
1 Parent(s): dd6ee55

create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - Microsoft Fara-7B Multi-Modal Demo
2
+ import gradio as gr
3
+ from transformers import AutoProcessor, AutoModelForVision2Seq
4
+ import torch
5
+ from PIL import Image
6
+ import requests
7
+ from io import BytesIO
8
+
9
+ # 加载模型(首次加载约需 5–10 分钟)
10
+ MODEL_NAME = "microsoft/Fara-7B"
11
+
12
+ print("正在加载模型,请稍候...")
13
+
14
+ processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
15
+ model = AutoModelForVision2Seq.from_pretrained(
16
+ MODEL_NAME,
17
+ trust_remote_code=True,
18
+ torch_dtype=torch.float16,
19
+ device_map="auto"
20
+ )
21
+
22
+ def chat_with_image(image: Image.Image, question: str, max_new_tokens: int = 200):
23
+ if image is None:
24
+ return "请上传一张图片。"
25
+ if not question.strip():
26
+ return "请输入问题。"
27
+
28
+ try:
29
+ # 构造消息格式
30
+ messages = [
31
+ {
32
+ "role": "user",
33
+ "content": [
34
+ {"type": "image"},
35
+ {"type": "text", "text": question}
36
+ ]
37
+ }
38
+ ]
39
+
40
+ # 应用聊天模板
41
+ prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
42
+
43
+ # 处理输入
44
+ inputs = processor(
45
+ text=prompt,
46
+ images=image,
47
+ return_tensors="pt"
48
+ ).to(model.device)
49
+
50
+ # 生成回答
51
+ with torch.no_grad():
52
+ outputs = model.generate(
53
+ **inputs,
54
+ max_new_tokens=max_new_tokens,
55
+ do_sample=False,
56
+ pad_token_id=processor.tokenizer.pad_token_id,
57
+ eos_token_id=processor.tokenizer.eos_token_id
58
+ )
59
+
60
+ response = processor.decode(outputs[0], skip_special_tokens=True)
61
+
62
+ # 清理输出(只保留 Assistant 回答部分)
63
+ if "Assistant:" in response:
64
+ response = response.split("Assistant:")[-1].strip()
65
+
66
+ return response
67
+
68
+ except Exception as e:
69
+ return f"处理出错: {str(e)}"
70
+
71
+ # Gradio 界面
72
+ with gr.Blocks(title="Fara-7B 多模态问答") as demo:
73
+ gr.Markdown("# 🖼️ Microsoft Fara-7B 图像问答系统\n上传图片并提问,AI 将为你解答!")
74
+
75
+ with gr.Row():
76
+ with gr.Column():
77
+ image_input = gr.Image(type="pil", label="上传图片")
78
+ question_input = gr.Textbox(label="你的问题", placeholder="例如:图中有什么动物?")
79
+ max_tokens = gr.Slider(50, 500, value=200, step=10, label="最大生成长度")
80
+ submit_btn = gr.Button("提交")
81
+ with gr.Column():
82
+ output = gr.Textbox(label="模型回答", lines=5)
83
+
84
+ submit_btn.click(
85
+ fn=chat_with_image,
86
+ inputs=[image_input, question_input, max_tokens],
87
+ outputs=output
88
+ )
89
+
90
+ gr.Examples(
91
+ examples=[
92
+ ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/candy.jpg", "What animal is on the candy?"],
93
+ ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png", "Describe the scene in detail."]
94
+ ],
95
+ inputs=[image_input, question_input]
96
+ )
97
+
98
+ demo.launch()