Qwen / app.py
ManarAli's picture
Update app.py
2ee66d7 verified
from transformers import AutoProcessor, AutoModelForVision2Seq
from qwen_vl_utils import process_vision_info
import gradio as gr
from PIL import Image
import torch
# Load 72B AWQ model
model2 = AutoModelForVision2Seq.from_pretrained(
"Qwen/Qwen2.5-VL-32B-Instruct",
dtype=torch.float16,
device_map="auto"
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct")
# Game rules in German
GAME_RULES = """In diesem Bild sehen Sie drei Farbraster. In der folgenden Äußerung beschreibt der Sprecher genau eines der Gitter.
Bitte geben Sie mir an, ob er sich auf das
linke, mittlere oder rechte Farbraster bezieht.
"""
# Load seven images
IMAGE_OPTIONS = {
"Bild 1": "example1.jpg",
"Bild 2": "example2.jpg",
"Bild 3": "example3.jpg",
"Bild 4": "example4.jpg",
"Bild 5": "example5.jpg",
"Bild 6": "example6.jpg",
"Bild 7": "example7.jpg",
"Bild 8": "example8.jpg",
"Bild 9": "example9.jpg"
}
# Function to run model
def play_game(selected_image_label, user_prompt):
selected_image_path = IMAGE_OPTIONS[selected_image_label]
selected_image = Image.open(selected_image_path)
# Build messages
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": selected_image},
{"type": "text", "text": GAME_RULES + "\n" + (user_prompt or "")},
],
}
]
# Prepare input using Qwen's utility function
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages) # Use Qwen utility!
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
).to(model2.device)
# Run generation
with torch.inference_mode():
generated_ids = model2.generate(**inputs, max_new_tokens=512)
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
return output_text
# Gradio App
with gr.Blocks() as demo:
with gr.Column():
image_selector = gr.Dropdown(
choices=list(IMAGE_OPTIONS.keys()),
value="Bild 2",
label="Wählen Sie ein Bild"
)
image_display = gr.Image(
value=Image.open(IMAGE_OPTIONS["Bild 2"]),
label="Bild",
interactive=False,
type="pil"
)
prompt_input = gr.Textbox(
value="Beschreibung",
label="Ihre Beschreibung"
)
output_text = gr.Textbox(label="Antwort des Modells")
play_button = gr.Button("Spiel starten")
def update_image(selected_label):
selected_path = IMAGE_OPTIONS[selected_label]
return Image.open(selected_path)
# When user changes selection, update image
image_selector.change(
fn=update_image,
inputs=[image_selector],
outputs=image_display
)
# When user clicks play, send inputs to model
play_button.click(
fn=play_game,
inputs=[image_selector, prompt_input],
outputs=output_text
)
demo.launch()