woofah commited on
Commit
67e1af1
·
verified ·
1 Parent(s): 4782480

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio demo for Local UI Locator — standalone HF Space version.
2
+
3
+ Upload a Windows screenshot → detect interactive elements → view overlay + JSON.
4
+ Self-contained: downloads model weights from HF Hub automatically.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from collections import Counter
11
+
12
+ import cv2
13
+ import gradio as gr
14
+ import numpy as np
15
+ from huggingface_hub import hf_hub_download
16
+ from ultralytics import YOLO
17
+
18
+ CLASS_NAMES = [
19
+ "button", "textbox", "checkbox", "dropdown", "icon", "tab", "menu_item",
20
+ ]
21
+
22
+ CLASS_COLORS = {
23
+ "button": (255, 127, 0),
24
+ "textbox": ( 0, 200, 0),
25
+ "checkbox": ( 0, 127, 255),
26
+ "dropdown": (200, 0, 200),
27
+ "icon": ( 0, 150, 255),
28
+ "tab": (255, 0, 100),
29
+ "menu_item": (100, 255, 255),
30
+ }
31
+
32
+ # Download model weights from HF Hub on startup.
33
+ _weights_path = hf_hub_download(
34
+ repo_id="IndextDataLab/windows-ui-locator",
35
+ filename="best.pt",
36
+ )
37
+ _model = YOLO(_weights_path)
38
+
39
+
40
+ def _draw_overlay(img_rgb: np.ndarray, results: list[dict]) -> np.ndarray:
41
+ overlay = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
42
+ for r in results:
43
+ x1, y1, x2, y2 = r["bbox"]
44
+ color = CLASS_COLORS.get(r["type"], (200, 200, 200))
45
+ label = f"{r['type']} {r['score']:.0%}"
46
+
47
+ cv2.rectangle(overlay, (x1, y1), (x2, y2), color, 2)
48
+ (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
49
+ cv2.rectangle(overlay, (x1, y1 - th - 6), (x1 + tw + 4, y1), color, -1)
50
+ cv2.putText(overlay, label, (x1 + 2, y1 - 4),
51
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
52
+
53
+ return cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
54
+
55
+
56
+ def detect(
57
+ image: np.ndarray | None,
58
+ conf: float,
59
+ iou: float,
60
+ class_filter: list[str],
61
+ ) -> tuple[np.ndarray | None, str, str]:
62
+ if image is None:
63
+ return None, "Upload an image first.", "[]"
64
+
65
+ preds = _model.predict(
66
+ source=image, conf=conf, iou=iou, verbose=False, max_det=300,
67
+ )
68
+
69
+ results = []
70
+ if preds and len(preds) > 0:
71
+ boxes = preds[0].boxes
72
+ if boxes is not None:
73
+ xyxy = boxes.xyxy.cpu().numpy()
74
+ confs = boxes.conf.cpu().numpy()
75
+ clss = boxes.cls.cpu().numpy().astype(int)
76
+
77
+ for i, (box, c, cls_id) in enumerate(zip(xyxy, confs, clss)):
78
+ cls_name = CLASS_NAMES[cls_id] if cls_id < len(CLASS_NAMES) else f"class_{cls_id}"
79
+ if class_filter and cls_name not in class_filter:
80
+ continue
81
+ results.append({
82
+ "id": i,
83
+ "type": cls_name,
84
+ "bbox": [int(box[0]), int(box[1]), int(box[2]), int(box[3])],
85
+ "score": round(float(c), 4),
86
+ "center": [int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)],
87
+ })
88
+
89
+ overlay = _draw_overlay(image, results)
90
+
91
+ counts = Counter(d["type"] for d in results)
92
+ summary_parts = [f"**{len(results)} elements detected**"]
93
+ for cls_name in sorted(counts):
94
+ summary_parts.append(f"- {cls_name}: {counts[cls_name]}")
95
+
96
+ return overlay, "\n".join(summary_parts), json.dumps(results, indent=2)
97
+
98
+
99
+ with gr.Blocks(title="Local UI Locator") as demo:
100
+ gr.Markdown(
101
+ "# Local UI Locator\n"
102
+ "Upload a Windows screenshot to detect interactive UI elements "
103
+ "(buttons, textboxes, checkboxes, dropdowns, icons, tabs, menu items).\n\n"
104
+ "**Model:** YOLO11s &nbsp;|&nbsp; **Classes:** 7 &nbsp;|&nbsp; **Dataset:** 3 000 synthetic images"
105
+ )
106
+
107
+ with gr.Row():
108
+ with gr.Column(scale=1):
109
+ input_image = gr.Image(label="Screenshot", type="numpy")
110
+ with gr.Row():
111
+ conf_slider = gr.Slider(
112
+ minimum=0.05, maximum=0.95, value=0.3, step=0.05,
113
+ label="Confidence threshold",
114
+ )
115
+ iou_slider = gr.Slider(
116
+ minimum=0.1, maximum=0.9, value=0.5, step=0.05,
117
+ label="IoU threshold (NMS)",
118
+ )
119
+ class_filter = gr.CheckboxGroup(
120
+ choices=CLASS_NAMES,
121
+ label="Filter classes (empty = all)",
122
+ )
123
+ detect_btn = gr.Button("Detect", variant="primary")
124
+
125
+ with gr.Column(scale=1):
126
+ output_image = gr.Image(label="Detection overlay")
127
+ summary_md = gr.Markdown(label="Summary")
128
+
129
+ with gr.Accordion("JSON output", open=False):
130
+ json_output = gr.Code(language="json", label="Detections JSON")
131
+
132
+ detect_btn.click(
133
+ fn=detect,
134
+ inputs=[input_image, conf_slider, iou_slider, class_filter],
135
+ outputs=[output_image, summary_md, json_output],
136
+ )
137
+
138
+ gr.Markdown(
139
+ "---\n"
140
+ "MIT License &nbsp;|&nbsp; "
141
+ "[GitHub](https://github.com/wuekv97/windowsUIdetector) &nbsp;|&nbsp; "
142
+ "YOLO11s + EasyOCR + rapidfuzz"
143
+ )
144
+
145
+ if __name__ == "__main__":
146
+ demo.launch()