Spaces:
Sleeping
Sleeping
| """Gradio demo for Local UI Locator — standalone HF Space version. | |
| Upload a Windows screenshot → detect interactive elements → view overlay + JSON. | |
| Self-contained: downloads model weights from HF Hub automatically. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from collections import Counter | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| from huggingface_hub import hf_hub_download | |
| from ultralytics import YOLO | |
| CLASS_NAMES = [ | |
| "button", "textbox", "checkbox", "dropdown", "icon", "tab", "menu_item", | |
| ] | |
| CLASS_COLORS = { | |
| "button": (255, 127, 0), | |
| "textbox": ( 0, 200, 0), | |
| "checkbox": ( 0, 127, 255), | |
| "dropdown": (200, 0, 200), | |
| "icon": ( 0, 150, 255), | |
| "tab": (255, 0, 100), | |
| "menu_item": (100, 255, 255), | |
| } | |
| # Download model weights from HF Hub on startup. | |
| _weights_path = hf_hub_download( | |
| repo_id="IndextDataLab/windows-ui-locator", | |
| filename="best.pt", | |
| ) | |
| _model = YOLO(_weights_path) | |
| def _draw_overlay(img_rgb: np.ndarray, results: list[dict]) -> np.ndarray: | |
| overlay = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR) | |
| for r in results: | |
| x1, y1, x2, y2 = r["bbox"] | |
| color = CLASS_COLORS.get(r["type"], (200, 200, 200)) | |
| label = f"{r['type']} {r['score']:.0%}" | |
| cv2.rectangle(overlay, (x1, y1), (x2, y2), color, 2) | |
| (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) | |
| cv2.rectangle(overlay, (x1, y1 - th - 6), (x1 + tw + 4, y1), color, -1) | |
| cv2.putText(overlay, label, (x1 + 2, y1 - 4), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA) | |
| return cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB) | |
| def detect( | |
| image: np.ndarray | None, | |
| conf: float, | |
| iou: float, | |
| class_filter: list[str], | |
| ) -> tuple[np.ndarray | None, str, str]: | |
| if image is None: | |
| return None, "Upload an image first.", "[]" | |
| preds = _model.predict( | |
| source=image, conf=conf, iou=iou, verbose=False, max_det=300, | |
| ) | |
| results = [] | |
| if preds and len(preds) > 0: | |
| boxes = preds[0].boxes | |
| if boxes is not None: | |
| xyxy = boxes.xyxy.cpu().numpy() | |
| confs = boxes.conf.cpu().numpy() | |
| clss = boxes.cls.cpu().numpy().astype(int) | |
| for i, (box, c, cls_id) in enumerate(zip(xyxy, confs, clss)): | |
| cls_name = CLASS_NAMES[cls_id] if cls_id < len(CLASS_NAMES) else f"class_{cls_id}" | |
| if class_filter and cls_name not in class_filter: | |
| continue | |
| results.append({ | |
| "id": i, | |
| "type": cls_name, | |
| "bbox": [int(box[0]), int(box[1]), int(box[2]), int(box[3])], | |
| "score": round(float(c), 4), | |
| "center": [int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)], | |
| }) | |
| overlay = _draw_overlay(image, results) | |
| counts = Counter(d["type"] for d in results) | |
| summary_parts = [f"**{len(results)} elements detected**"] | |
| for cls_name in sorted(counts): | |
| summary_parts.append(f"- {cls_name}: {counts[cls_name]}") | |
| return overlay, "\n".join(summary_parts), json.dumps(results, indent=2) | |
| with gr.Blocks(title="Windows UI Element Detector") as demo: | |
| gr.Markdown( | |
| "# Windows UI Element Detector\n" | |
| "Upload a Windows screenshot to detect interactive UI elements " | |
| "(buttons, textboxes, checkboxes, dropdowns, icons, tabs, menu items).\n\n" | |
| "**Model:** YOLO11s | **Classes:** 7 | **Dataset:** 3 000 synthetic images" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image(label="Screenshot", type="numpy") | |
| with gr.Row(): | |
| conf_slider = gr.Slider( | |
| minimum=0.05, maximum=0.95, value=0.3, step=0.05, | |
| label="Confidence threshold", | |
| ) | |
| iou_slider = gr.Slider( | |
| minimum=0.1, maximum=0.9, value=0.5, step=0.05, | |
| label="IoU threshold (NMS)", | |
| ) | |
| class_filter = gr.CheckboxGroup( | |
| choices=CLASS_NAMES, | |
| label="Filter classes (empty = all)", | |
| ) | |
| detect_btn = gr.Button("Detect", variant="primary") | |
| with gr.Column(scale=1): | |
| output_image = gr.Image(label="Detection overlay") | |
| summary_md = gr.Markdown(label="Summary") | |
| with gr.Accordion("JSON output", open=False): | |
| json_output = gr.Code(language="json", label="Detections JSON") | |
| detect_btn.click( | |
| fn=detect, | |
| inputs=[input_image, conf_slider, iou_slider, class_filter], | |
| outputs=[output_image, summary_md, json_output], | |
| ) | |
| gr.Markdown( | |
| "---\n" | |
| "MIT License | " | |
| "[GitHub](https://github.com/Indext-Data-Lab/windows-ui-synth) | " | |
| "YOLO11s + EasyOCR + rapidfuzz | " | |
| "Commission a similar tool or a fully integrated AI solution for your business -> " | |
| "[Visit indext.io](https://indext.io/) | " | |
| "[Connect on LinkedIn](https://www.linkedin.com/company/indext-data-lab/)" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |