Spaces:

IndextDataLab
/

windows-ui-locator

Sleeping

App Files Files Community

woofah commited on 28 days ago

Commit

67e1af1

verified ·

1 Parent(s): 4782480

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+"""Gradio demo for Local UI Locator — standalone HF Space version.
+Upload a Windows screenshot → detect interactive elements → view overlay + JSON.
+Self-contained: downloads model weights from HF Hub automatically.
+"""
+from __future__ import annotations
+import json
+from collections import Counter
+import cv2
+import gradio as gr
+import numpy as np
+from huggingface_hub import hf_hub_download
+from ultralytics import YOLO
+CLASS_NAMES = [
+    "button", "textbox", "checkbox", "dropdown", "icon", "tab", "menu_item",
+]
+CLASS_COLORS = {
+    "button":    (255, 127,   0),
+    "textbox":   (  0, 200,   0),
+    "checkbox":  (  0, 127, 255),
+    "dropdown":  (200,   0, 200),
+    "icon":      (  0, 150, 255),
+    "tab":       (255,   0, 100),
+    "menu_item": (100, 255, 255),
+}
+# Download model weights from HF Hub on startup.
+_weights_path = hf_hub_download(
+    repo_id="IndextDataLab/windows-ui-locator",
+    filename="best.pt",
+)
+_model = YOLO(_weights_path)
+def _draw_overlay(img_rgb: np.ndarray, results: list[dict]) -> np.ndarray:
+    overlay = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+    for r in results:
+        x1, y1, x2, y2 = r["bbox"]
+        color = CLASS_COLORS.get(r["type"], (200, 200, 200))
+        label = f"{r['type']} {r['score']:.0%}"
+        cv2.rectangle(overlay, (x1, y1), (x2, y2), color, 2)
+        (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        cv2.rectangle(overlay, (x1, y1 - th - 6), (x1 + tw + 4, y1), color, -1)
+        cv2.putText(overlay, label, (x1 + 2, y1 - 4),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
+    return cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
+def detect(
+    image: np.ndarray | None,
+    conf: float,
+    iou: float,
+    class_filter: list[str],
+) -> tuple[np.ndarray | None, str, str]:
+    if image is None:
+        return None, "Upload an image first.", "[]"
+    preds = _model.predict(
+        source=image, conf=conf, iou=iou, verbose=False, max_det=300,
+    )
+    results = []
+    if preds and len(preds) > 0:
+        boxes = preds[0].boxes
+        if boxes is not None:
+            xyxy = boxes.xyxy.cpu().numpy()
+            confs = boxes.conf.cpu().numpy()
+            clss = boxes.cls.cpu().numpy().astype(int)
+            for i, (box, c, cls_id) in enumerate(zip(xyxy, confs, clss)):
+                cls_name = CLASS_NAMES[cls_id] if cls_id < len(CLASS_NAMES) else f"class_{cls_id}"
+                if class_filter and cls_name not in class_filter:
+                    continue
+                results.append({
+                    "id": i,
+                    "type": cls_name,
+                    "bbox": [int(box[0]), int(box[1]), int(box[2]), int(box[3])],
+                    "score": round(float(c), 4),
+                    "center": [int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)],
+                })
+    overlay = _draw_overlay(image, results)
+    counts = Counter(d["type"] for d in results)
+    summary_parts = [f"**{len(results)} elements detected**"]
+    for cls_name in sorted(counts):
+        summary_parts.append(f"- {cls_name}: {counts[cls_name]}")
+    return overlay, "\n".join(summary_parts), json.dumps(results, indent=2)
+with gr.Blocks(title="Local UI Locator") as demo:
+    gr.Markdown(
+        "# Local UI Locator\n"
+        "Upload a Windows screenshot to detect interactive UI elements "
+        "(buttons, textboxes, checkboxes, dropdowns, icons, tabs, menu items).\n\n"
+        "**Model:** YOLO11s &nbsp;|&nbsp; **Classes:** 7 &nbsp;|&nbsp; **Dataset:** 3 000 synthetic images"
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(label="Screenshot", type="numpy")
+            with gr.Row():
+                conf_slider = gr.Slider(
+                    minimum=0.05, maximum=0.95, value=0.3, step=0.05,
+                    label="Confidence threshold",
+                )
+                iou_slider = gr.Slider(
+                    minimum=0.1, maximum=0.9, value=0.5, step=0.05,
+                    label="IoU threshold (NMS)",
+                )
+            class_filter = gr.CheckboxGroup(
+                choices=CLASS_NAMES,
+                label="Filter classes (empty = all)",
+            )
+            detect_btn = gr.Button("Detect", variant="primary")
+        with gr.Column(scale=1):
+            output_image = gr.Image(label="Detection overlay")
+            summary_md = gr.Markdown(label="Summary")
+    with gr.Accordion("JSON output", open=False):
+        json_output = gr.Code(language="json", label="Detections JSON")
+    detect_btn.click(
+        fn=detect,
+        inputs=[input_image, conf_slider, iou_slider, class_filter],
+        outputs=[output_image, summary_md, json_output],
+    )
+    gr.Markdown(
+        "---\n"
+        "MIT License &nbsp;|&nbsp; "
+        "[GitHub](https://github.com/wuekv97/windowsUIdetector) &nbsp;|&nbsp; "
+        "YOLO11s + EasyOCR + rapidfuzz"
+    )
+if __name__ == "__main__":
+    demo.launch()