Spaces:

EthanCastro
/

quickdraw-tldraw-app

Sleeping

File size: 3,830 Bytes

af3303a
 
 
 
 
 
401720b
f9d56ed
af3303a
 
 
 
 
 
 
 
 
 
 
 
 
 
f9d56ed
 
af3303a
f9d56ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af3303a
f9d56ed
af3303a
 
 
 
 
 
f9d56ed
af3303a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9d56ed
 
af3303a
 
f9d56ed
 
af3303a
 
 
 
 
 
 
 
 
 
 
 
f9d56ed
af3303a
f9d56ed
 
 
 
 
af3303a
 
f9d56ed
af3303a
 
 
f9d56ed
fd7a705

import gradio as gr
from transformers import AutoModelForImageTextToText, AutoProcessor
from peft import PeftModel
import torch

# --- CONFIGURATION ---
BASE_MODEL = "unsloth/Qwen3-VL-2B-Instruct-unsloth-bnb-4bit"
LORA_ID = "EthanCastro/qwen3-vl-2b-quickdraw" 

print("Loading model and processor...")
model = AutoModelForImageTextToText.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

model = PeftModel.from_pretrained(model, LORA_ID)
processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-2B-Instruct", trust_remote_code=True)
print("Model Ready!")

def respond(message, image, history):
    # History is now a list of dictionaries
    # Format: [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}]
    messages = []
    
    # 1. Convert history to Qwen's multimodal format
    for msg in history:
        # We need to ensure content is treated as text for the history buffer
        content = msg["content"]
        # If content is a list (multimodal), extract just the text for simplicity
        if isinstance(content, list):
            text_content = next((item['text'] for item in content if item['type'] == 'text'), "")
        else:
            text_content = content
            
        messages.append({
            "role": msg["role"], 
            "content": [{"type": "text", "text": text_content}]
        })

    # 2. Add current user turn with the new image
    user_content = []
    if image is not None:
        user_content.append({"type": "image", "image": image})
    user_content.append({"type": "text", "text": message})
    messages.append({"role": "user", "content": user_content})

    # 3. Tokenize and Generate
    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    if image is not None:
        inputs = processor(text=[text], images=[image], return_tensors="pt").to("cuda")
    else:
        inputs = processor(text=[text], return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=1500, temperature=0.3)
    
    generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    
    if "assistant" in generated_text:
        response = generated_text.split("assistant")[-1].strip()
    else:
        response = generated_text

    return response

# --- GRADIO INTERFACE ---
# Note: 'theme' removed from here per Gradio 6 migration guide
with gr.Blocks() as demo:
    gr.Markdown("# 🎨 QuickDraw → tldraw JSON")
    
    # Chatbot using default "messages" format (no type argument needed)
    chatbot = gr.Chatbot(height=500)
    
    with gr.Row():
        img_input = gr.Image(type="pil", label="Upload Sketch", scale=1)
        with gr.Column(scale=3):
            txt_input = gr.Textbox(
                show_label=False, 
                placeholder="Convert this sketch to tldraw JSON format...", 
                container=False
            )
            submit_btn = gr.Button("Send", variant="primary")

    def chat_wrapper(message, image, history):
        # 1. Get response
        bot_res = respond(message, image, history)
        
        # 2. Update history using DICTIONARIES
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": bot_res})
        
        return "", None, history

    # Initialize state as an empty list
    submit_btn.click(chat_wrapper, [txt_input, img_input, chatbot], [txt_input, img_input, chatbot])
    txt_input.submit(chat_wrapper, [txt_input, img_input, chatbot], [txt_input, img_input, chatbot])

# Theme is now applied here in launch()
# Disable SSR to help prevent 503 errors on resource-constrained Spaces
demo.launch(theme=gr.themes.Soft(), ssr_mode=False)