Spaces:
Running
Running
| # app.py - Microsoft Fara-7B Multi-Modal Demo | |
| import gradio as gr | |
| from transformers import AutoProcessor, AutoModelForVision2Seq | |
| import torch | |
| from PIL import Image | |
| import requests | |
| from io import BytesIO | |
| # 加载模型(首次加载约需 5–10 分钟) | |
| MODEL_NAME = "microsoft/Fara-7B" | |
| print("正在加载模型,请稍候...") | |
| processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| model = AutoModelForVision2Seq.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| def chat_with_image(image: Image.Image, question: str, max_new_tokens: int = 200): | |
| if image is None: | |
| return "请上传一张图片。" | |
| if not question.strip(): | |
| return "请输入问题。" | |
| try: | |
| # 构造消息格式 | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": question} | |
| ] | |
| } | |
| ] | |
| # 应用聊天模板 | |
| prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| # 处理输入 | |
| inputs = processor( | |
| text=prompt, | |
| images=image, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| # 生成回答 | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=False, | |
| pad_token_id=processor.tokenizer.pad_token_id, | |
| eos_token_id=processor.tokenizer.eos_token_id | |
| ) | |
| response = processor.decode(outputs[0], skip_special_tokens=True) | |
| # 清理输出(只保留 Assistant 回答部分) | |
| if "Assistant:" in response: | |
| response = response.split("Assistant:")[-1].strip() | |
| return response | |
| except Exception as e: | |
| return f"处理出错: {str(e)}" | |
| # Gradio 界面 | |
| with gr.Blocks(title="Fara-7B 多模态问答") as demo: | |
| gr.Markdown("# 🖼️ Microsoft Fara-7B 图像问答系统\n上传图片并提问,AI 将为你解答!") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(type="pil", label="上传图片") | |
| question_input = gr.Textbox(label="你的问题", placeholder="例如:图中有什么动物?") | |
| max_tokens = gr.Slider(50, 500, value=200, step=10, label="最大生成长度") | |
| submit_btn = gr.Button("提交") | |
| with gr.Column(): | |
| output = gr.Textbox(label="模型回答", lines=5) | |
| submit_btn.click( | |
| fn=chat_with_image, | |
| inputs=[image_input, question_input, max_tokens], | |
| outputs=output | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["https://tse2-mm.cn.bing.net/th/id/OIP-C.OkY4eWXcSyyit75R53WOBQAAAA?w=330&h=174&c=7&r=0&o=7&cb=ucfimg2&pid=1.7&rm=3&ucfimg=1", "What animal is on the candy?"], | |
| ["https://tse2-mm.cn.bing.net/th/id/OIP-C.OkY4eWXcSyyit75R53WOBQAAAA?w=330&h=174&c=7&r=0&o=7&cb=ucfimg2&pid=1.7&rm=3&ucfimg=1", "Describe the scene in detail."] | |
| ], | |
| inputs=[image_input, question_input] | |
| ) | |
| demo.launch() |