Spaces:

Zeyad-Alaa
/

UI-analysis

Runtime error

App Files Files Community

UI-analysis / app.py

Zeyad-Alaa

Create app.py

04c8445 verified about 1 month ago

raw

history blame contribute delete

11.1 kB

	import gradio as gr
	import torch
	import json
	import re
	import spaces
	from PIL import Image
	from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
	from peft import PeftModel

	# ── Config ───────────────────────────────────────────────────────────────────
	BASE_MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
	ADAPTER_PATH = "./" # adapter_config.json + adapter_model.safetensors are here

	# ── Load processor once at startup ───────────────────────────────────────────
	processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)

	# ── Load base model, then attach LoRA adapter ────────────────────────────────
	base_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	BASE_MODEL_ID,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	)

	model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
	model.eval()

	# ── System prompt ─────────────────────────────────────────────────────────────
	SYSTEM_PROMPT = """You are an expert UI/UX design analyst. When given a UI screenshot, analyze it thoroughly and return a structured JSON report with exactly these keys:

	{
	"ui_elements": [
	{"type": "element type", "label": "text/label if any", "position_hint": "top-left / center / etc."}
	],
	"layout_structure": "description of overall layout pattern (e.g. sidebar + main, top-nav + grid)",
	"hierarchy": {
	"primary": ["most prominent / CTA elements"],
	"secondary": ["supporting elements"],
	"tertiary": ["decorative or minor elements"]
	},
	"style": {
	"color_palette": ["dominant colors as hex or descriptive names"],
	"typography": "font style observations",
	"spacing": "tight / balanced / airy",
	"visual_theme": "overall aesthetic feel"
	},
	"summary": "one paragraph plain-English summary of the UI"
	}

	Respond ONLY with valid JSON. No markdown fences, no extra commentary."""


	# ── Inference ─────────────────────────────────────────────────────────────────
	@spaces.GPU
	def analyze_ui(image: Image.Image, extra_prompt: str) -> tuple[str, str]:
	if image is None:
	return "⚠️ Please upload a UI screenshot first.", "{}"

	user_text = (
	extra_prompt.strip()
	if extra_prompt and extra_prompt.strip()
	else "Analyze this UI design screenshot in full detail."
	)

	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{
	"role": "user",
	"content": [
	{"type": "image", "image": image},
	{"type": "text", "text": user_text},
	],
	},
	]

	text_input = processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)

	inputs = processor(
	text=[text_input],
	images=[image],
	return_tensors="pt",
	padding=True,
	)
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=1024,
	do_sample=False,
	temperature=None,
	top_p=None,
	repetition_penalty=1.1,
	)

	input_len = inputs["input_ids"].shape[1]
	generated = output_ids[:, input_len:]
	raw_text = processor.batch_decode(generated, skip_special_tokens=True)[0].strip()

	# ── Parse JSON ────────────────────────────────────────────────────────────
	json_text = "{}"
	pretty_text = raw_text

	cleaned = re.sub(r"^```(?:json)?\s*", "", raw_text, flags=re.MULTILINE)
	cleaned = re.sub(r"\s*```$", "", cleaned, flags=re.MULTILINE).strip()

	try:
	parsed = json.loads(cleaned)
	json_text = json.dumps(parsed, indent=2, ensure_ascii=False)

	lines = ["## 📊 UI Analysis Report\n"]

	# Elements
	elements = parsed.get("ui_elements", [])
	if elements:
	lines.append("### 🧩 UI Elements\n")
	for el in elements:
	if isinstance(el, dict):
	etype = el.get("type", "element")
	label = el.get("label") or el.get("text") or ""
	pos = el.get("position_hint") or el.get("position") or ""
	lines.append(
	f"- {etype}"
	+ (f": {label}" if label else "")
	+ (f" — {pos}" if pos else "")
	)
	else:
	lines.append(f"- {el}")
	lines.append("")

	# Layout
	layout = parsed.get("layout_structure", "")
	if layout:
	lines.append(f"### 🏗️ Layout\n{layout}\n")

	# Hierarchy
	hier = parsed.get("hierarchy", {})
	if hier:
	lines.append("### 📐 Hierarchy")
	for level, items in hier.items():
	if items:
	val = ", ".join(items) if isinstance(items, list) else items
	lines.append(f"- {level.capitalize()}: {val}")
	lines.append("")

	# Style
	style = parsed.get("style", {})
	if style:
	lines.append("### 🎨 Style")
	for key, val in style.items():
	display_key = key.replace("_", " ").title()
	if isinstance(val, list):
	lines.append(f"- {display_key}: {', '.join(str(v) for v in val)}")
	else:
	lines.append(f"- {display_key}: {val}")
	lines.append("")

	# Summary
	summary = parsed.get("summary", "")
	if summary:
	lines.append(f"### 💬 Summary\n{summary}\n")

	pretty_text = "\n".join(lines)

	except json.JSONDecodeError:
	json_text = json.dumps({"raw_output": raw_text}, indent=2)
	pretty_text = f"## 📊 UI Analysis\n\n{raw_text}"

	return pretty_text, json_text


	# ── Gradio UI ─────────────────────────────────────────────────────────────────
	CSS = """
	@import url('https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@700;800&family=DM+Sans:wght@300;400;500&display=swap');

	:root {
	--bg: #0c0c10;
	--surface: #14141a;
	--surface2: #1c1c26;
	--border: #2a2a38;
	--accent: #7c6aff;
	--accent2: #ff6a9b;
	--text: #e8e8f0;
	--muted: #6b6b80;
	--radius: 12px;
	}

	body, .gradio-container {
	background: var(--bg) !important;
	font-family: 'DM Sans', sans-serif;
	color: var(--text);
	}

	#header { text-align: center; padding: 2.5rem 1rem 1.5rem; }
	#header h1 {
	font-family: 'Syne', sans-serif; font-size: 2.6rem; font-weight: 800;
	background: linear-gradient(135deg, #7c6aff 0%, #ff6a9b 100%);
	-webkit-background-clip: text; -webkit-text-fill-color: transparent; margin: 0;
	}
	#header p { color: var(--muted); font-size: 1rem; margin: 0.5rem 0 0; }

	.panel {
	background: var(--surface) !important;
	border: 1px solid var(--border) !important;
	border-radius: var(--radius) !important;
	}

	label {
	font-family: 'DM Mono', monospace !important; font-size: 0.75rem !important;
	color: var(--muted) !important; letter-spacing: 0.05em; text-transform: uppercase;
	}

	textarea, input[type=text] {
	background: var(--surface2) !important; border: 1px solid var(--border) !important;
	color: var(--text) !important; border-radius: 8px !important;
	font-family: 'DM Sans', sans-serif !important;
	}

	.run-btn {
	background: linear-gradient(135deg, #7c6aff, #ff6a9b) !important;
	border: none !important; border-radius: 8px !important; color: white !important;
	font-family: 'Syne', sans-serif !important; font-weight: 700 !important;
	font-size: 1rem !important; padding: 0.75rem 2rem !important; cursor: pointer;
	}

	.tab-nav button {
	font-family: 'DM Mono', monospace !important; font-size: 0.8rem !important;
	color: var(--muted) !important; border-radius: 6px !important;
	}
	.tab-nav button.selected { color: var(--accent) !important; border-color: var(--accent) !important; }

	.markdown-body { font-family: 'DM Sans', sans-serif !important; color: var(--text) !important; }
	.markdown-body h2 { font-family: 'Syne', sans-serif; color: var(--accent); }
	.markdown-body h3 { font-family: 'Syne', sans-serif; color: var(--accent2); font-size: 1rem; }
	.markdown-body code {
	font-family: 'DM Mono', monospace; background: var(--surface2);
	border-radius: 4px; padding: 0.1em 0.4em;
	}

	.footer {
	text-align: center; color: var(--muted); font-family: 'DM Mono', monospace;
	font-size: 0.72rem; padding: 1.5rem; letter-spacing: 0.04em;
	}
	"""

	with gr.Blocks(css=CSS, title="UI Design Analyzer · Qwen2.5-VL + LoRA") as demo:

	gr.HTML("""
	<div id="header">
	<h1>UI Design Analyzer</h1>
	<p>Qwen2.5-VL-3B-Instruct + LoRA Adapter  ·  Drop any UI screenshot for instant deep analysis</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1, elem_classes="panel"):
	image_input = gr.Image(
	type="pil",
	label="Upload UI Screenshot",
	height=340,
	)
	prompt_input = gr.Textbox(
	lines=3,
	placeholder="Optional: focus the analysis — e.g. 'Focus on navigation and CTA hierarchy'",
	label="Custom Prompt (optional)",
	)
	run_btn = gr.Button("⚡ Analyze UI", elem_classes="run-btn")

	with gr.Column(scale=1, elem_classes="panel"):
	with gr.Tabs():
	with gr.Tab("📋 Readable Report"):
	report_out = gr.Markdown(
	value="Upload a screenshot and click Analyze UI to begin.",
	elem_classes="markdown-body",
	)
	with gr.Tab("{ } Raw JSON"):
	json_out = gr.Code(language="json", label="Structured JSON Output")

	run_btn.click(
	fn=analyze_ui,
	inputs=[image_input, prompt_input],
	outputs=[report_out, json_out],
	)

	gr.HTML(
	'<div class="footer">'
	'BASE: Qwen/Qwen2.5-VL-3B-Instruct  ·  '
	'ADAPTER: LoRA (adapter_model.safetensors)  ·  '
	'HF ZeroGPU'
	'</div>'
	)

	demo.launch()