Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,800 Bytes
a14c972 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import gradio as gr
from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
import torch
from PIL import Image
import io
import base64
import spaces
# Load model and processor
model = Qwen3VLForConditionalGeneration.from_pretrained(
"Qwen/Qwen3-VL-2B-Instruct",
torch_dtype=torch.bfloat16,
device_map="auto"
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-2B-Instruct")
def process_image(image):
"""Convert image to base64 string for processing"""
if isinstance(image, str):
return image
if isinstance(image, Image.Image):
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/png;base64,{img_str}"
return image
@spaces.GPU(duration=120)
def qwen_chat(message, image, chat_history):
"""
Process chat message with optional image input
Args:
message (str): User's text message
image: Optional image input
chat_history (list): Previous conversation history
Returns:
tuple: Updated chat history and empty message input
"""
if not message and image is None:
return chat_history, ""
# Build messages list
messages = []
# Add previous chat history
for user_msg, assistant_msg in chat_history:
messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]})
# Add current message with optional image
current_content = []
if image is not None:
current_content.append({
"type": "image",
"image": image
})
if message:
current_content.append({
"type": "text",
"text": message
})
messages.append({
"role": "user",
"content": current_content
})
# Prepare inputs
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
inputs = inputs.to(model.device)
# Generate response
with torch.no_grad():
generated_ids = model.generate(**inputs, max_new_tokens=256)
# Decode output
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)[0]
# Update chat history
chat_history.append((message if message else "[Image provided]", output_text))
return chat_history, ""
# Create Gradio interface
with gr.Blocks(title="Qwen3-VL Chat") as demo:
gr.Markdown(
"""
# 🎨 Qwen3-VL Chat
Chat with Qwen3-VL-2B-Instruct - A multimodal AI that can understand both text and images!
[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
"""
)
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="Chat History",
type="messages",
height=600,
show_copy_button=True
)
with gr.Column(scale=1):
image_input = gr.Image(
label="Upload Image (Optional)",
type="pil",
sources=["upload", "clipboard"],
interactive=True
)
with gr.Row():
message_input = gr.Textbox(
label="Message",
placeholder="Type your message here...",
lines=2,
scale=4
)
send_btn = gr.Button("Send", scale=1, variant="primary")
with gr.Row():
clear_btn = gr.Button("Clear Chat", variant="secondary")
gr.Markdown(
"""
### Tips:
- Upload an image to ask questions about it
- Describe what you see or ask for analysis
- The model can answer questions about images and text
"""
)
# Event handlers
def send_message(msg, img, history):
return qwen_chat(msg, img, history)
send_btn.click(
send_message,
inputs=[message_input, image_input, chatbot],
outputs=[chatbot, message_input]
)
message_input.submit(
send_message,
inputs=[message_input, image_input, chatbot],
outputs=[chatbot, message_input]
)
clear_btn.click(
lambda: ([], None, ""),
outputs=[chatbot, image_input, message_input]
)
if __name__ == "__main__":
demo.launch(share=False) |