Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| from peft import PeftModel | |
| import torch | |
| # --- CONFIGURATION --- | |
| BASE_MODEL = "unsloth/Qwen3-VL-2B-Instruct-unsloth-bnb-4bit" | |
| LORA_ID = "EthanCastro/qwen3-vl-2b-quickdraw" | |
| print("Loading model and processor...") | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| model = PeftModel.from_pretrained(model, LORA_ID) | |
| processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-2B-Instruct", trust_remote_code=True) | |
| print("Model Ready!") | |
| def respond(message, image, history): | |
| # History is now a list of dictionaries | |
| # Format: [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}] | |
| messages = [] | |
| # 1. Convert history to Qwen's multimodal format | |
| for msg in history: | |
| # We need to ensure content is treated as text for the history buffer | |
| content = msg["content"] | |
| # If content is a list (multimodal), extract just the text for simplicity | |
| if isinstance(content, list): | |
| text_content = next((item['text'] for item in content if item['type'] == 'text'), "") | |
| else: | |
| text_content = content | |
| messages.append({ | |
| "role": msg["role"], | |
| "content": [{"type": "text", "text": text_content}] | |
| }) | |
| # 2. Add current user turn with the new image | |
| user_content = [] | |
| if image is not None: | |
| user_content.append({"type": "image", "image": image}) | |
| user_content.append({"type": "text", "text": message}) | |
| messages.append({"role": "user", "content": user_content}) | |
| # 3. Tokenize and Generate | |
| text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| if image is not None: | |
| inputs = processor(text=[text], images=[image], return_tensors="pt").to("cuda") | |
| else: | |
| inputs = processor(text=[text], return_tensors="pt").to("cuda") | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_new_tokens=1500, temperature=0.3) | |
| generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
| if "assistant" in generated_text: | |
| response = generated_text.split("assistant")[-1].strip() | |
| else: | |
| response = generated_text | |
| return response | |
| # --- GRADIO INTERFACE --- | |
| # Note: 'theme' removed from here per Gradio 6 migration guide | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🎨 QuickDraw → tldraw JSON") | |
| # Chatbot using default "messages" format (no type argument needed) | |
| chatbot = gr.Chatbot(height=500) | |
| with gr.Row(): | |
| img_input = gr.Image(type="pil", label="Upload Sketch", scale=1) | |
| with gr.Column(scale=3): | |
| txt_input = gr.Textbox( | |
| show_label=False, | |
| placeholder="Convert this sketch to tldraw JSON format...", | |
| container=False | |
| ) | |
| submit_btn = gr.Button("Send", variant="primary") | |
| def chat_wrapper(message, image, history): | |
| # 1. Get response | |
| bot_res = respond(message, image, history) | |
| # 2. Update history using DICTIONARIES | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": bot_res}) | |
| return "", None, history | |
| # Initialize state as an empty list | |
| submit_btn.click(chat_wrapper, [txt_input, img_input, chatbot], [txt_input, img_input, chatbot]) | |
| txt_input.submit(chat_wrapper, [txt_input, img_input, chatbot], [txt_input, img_input, chatbot]) | |
| # Theme is now applied here in launch() | |
| # Disable SSR to help prevent 503 errors on resource-constrained Spaces | |
| demo.launch(theme=gr.themes.Soft(), ssr_mode=False) |