Spaces:

EthanCastro
/

quickdraw-tldraw-app

Sleeping

App Files Files Community

quickdraw-tldraw-app / app.py

EthanCastro

Update app.py

fd7a705 verified 10 days ago

raw

history blame contribute delete

3.83 kB

	import gradio as gr
	from transformers import AutoModelForImageTextToText, AutoProcessor
	from peft import PeftModel
	import torch

	# --- CONFIGURATION ---
	BASE_MODEL = "unsloth/Qwen3-VL-2B-Instruct-unsloth-bnb-4bit"
	LORA_ID = "EthanCastro/qwen3-vl-2b-quickdraw"

	print("Loading model and processor...")
	model = AutoModelForImageTextToText.from_pretrained(
	BASE_MODEL,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)

	model = PeftModel.from_pretrained(model, LORA_ID)
	processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-2B-Instruct", trust_remote_code=True)
	print("Model Ready!")

	def respond(message, image, history):
	# History is now a list of dictionaries
	# Format: [{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hello"}]
	messages = []

	# 1. Convert history to Qwen's multimodal format
	for msg in history:
	# We need to ensure content is treated as text for the history buffer
	content = msg["content"]
	# If content is a list (multimodal), extract just the text for simplicity
	if isinstance(content, list):
	text_content = next((item['text'] for item in content if item['type'] == 'text'), "")
	else:
	text_content = content

	messages.append({
	"role": msg["role"],
	"content": [{"type": "text", "text": text_content}]
	})

	# 2. Add current user turn with the new image
	user_content = []
	if image is not None:
	user_content.append({"type": "image", "image": image})
	user_content.append({"type": "text", "text": message})
	messages.append({"role": "user", "content": user_content})

	# 3. Tokenize and Generate
	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	if image is not None:
	inputs = processor(text=[text], images=[image], return_tensors="pt").to("cuda")
	else:
	inputs = processor(text=[text], return_tensors="pt").to("cuda")

	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=1500, temperature=0.3)

	generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]

	if "assistant" in generated_text:
	response = generated_text.split("assistant")[-1].strip()
	else:
	response = generated_text

	return response

	# --- GRADIO INTERFACE ---
	# Note: 'theme' removed from here per Gradio 6 migration guide
	with gr.Blocks() as demo:
	gr.Markdown("# 🎨 QuickDraw → tldraw JSON")

	# Chatbot using default "messages" format (no type argument needed)
	chatbot = gr.Chatbot(height=500)

	with gr.Row():
	img_input = gr.Image(type="pil", label="Upload Sketch", scale=1)
	with gr.Column(scale=3):
	txt_input = gr.Textbox(
	show_label=False,
	placeholder="Convert this sketch to tldraw JSON format...",
	container=False
	)
	submit_btn = gr.Button("Send", variant="primary")

	def chat_wrapper(message, image, history):
	# 1. Get response
	bot_res = respond(message, image, history)

	# 2. Update history using DICTIONARIES
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": bot_res})

	return "", None, history

	# Initialize state as an empty list
	submit_btn.click(chat_wrapper, [txt_input, img_input, chatbot], [txt_input, img_input, chatbot])
	txt_input.submit(chat_wrapper, [txt_input, img_input, chatbot], [txt_input, img_input, chatbot])

	# Theme is now applied here in launch()
	# Disable SSR to help prevent 503 errors on resource-constrained Spaces
	demo.launch(theme=gr.themes.Soft(), ssr_mode=False)