colomboMk commited on
Commit
37bac90
·
verified ·
1 Parent(s): a77b572

Upload 2 files

Browse files

app and weights

Files changed (2) hide show
  1. app.py +207 -0
  2. best.pt +3 -0
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #"/home/mclm/phd/best.pt"
3
+
4
+ import os
5
+ import cv2
6
+ import numpy as np
7
+ import gradio as gr
8
+
9
+ from sahi import AutoDetectionModel
10
+ from sahi.predict import get_sliced_prediction
11
+
12
+ # Soglia massima consentita per il lato della bbox (in pixel)
13
+ MAX_SIDE_PX = 70
14
+
15
+
16
+ def _draw_boxes_rgb(image_rgb: np.ndarray, result, target_class: str):
17
+ """
18
+ Disegna solo le bbox sul frame RGB (niente etichette testuali).
19
+ - Evidenzia in rosso la classe target
20
+ - Le altre classi in verde
21
+ - Scarta le bbox con lato (max tra width e height) > MAX_SIDE_PX
22
+ Restituisce (immagine_annotata_RGB, counts_text)
23
+ """
24
+ # Garantisci 3 canali
25
+ if image_rgb.ndim == 2:
26
+ image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_GRAY2RGB)
27
+ elif image_rgb.shape[2] == 4:
28
+ image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_RGBA2RGB)
29
+
30
+ H, W = image_rgb.shape[:2]
31
+
32
+ # OpenCV disegna in BGR
33
+ vis_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
34
+ target_count = 0
35
+ total_count = 0
36
+
37
+ object_predictions = getattr(result, "object_prediction_list", []) or []
38
+
39
+ for item in object_predictions:
40
+ # bbox
41
+ try:
42
+ x1, y1, x2, y2 = map(int, item.bbox.to_xyxy())
43
+ except Exception:
44
+ x1, y1 = int(getattr(item.bbox, "minx", 0)), int(getattr(item.bbox, "miny", 0))
45
+ x2, y2 = int(getattr(item.bbox, "maxx", 0)), int(getattr(item.bbox, "maxy", 0))
46
+
47
+ # Clamp ai bordi immagine
48
+ x1 = max(0, min(x1, W - 1))
49
+ y1 = max(0, min(y1, H - 1))
50
+ x2 = max(0, min(x2, W - 1))
51
+ y2 = max(0, min(y2, H - 1))
52
+
53
+ # Normalizza coordinate in caso invertite
54
+ if x2 < x1:
55
+ x1, x2 = x2, x1
56
+ if y2 < y1:
57
+ y1, y2 = y2, y1
58
+
59
+ # Scarta bbox non valide
60
+ w = max(0, x2 - x1)
61
+ h = max(0, y2 - y1)
62
+ if w == 0 or h == 0:
63
+ continue
64
+
65
+ # Scarta le bbox con lato maggiore della soglia
66
+ if max(w, h) > MAX_SIDE_PX:
67
+ continue
68
+
69
+ # Scarta bbox con area non positiva (per sicurezza)
70
+ area = getattr(item.bbox, "area", w * h)
71
+ try:
72
+ area_val = float(area() if callable(area) else area)
73
+ except Exception:
74
+ area_val = float(w * h)
75
+ if area_val <= 0:
76
+ continue
77
+
78
+ cls = getattr(item.category, "name", "unknown")
79
+ is_target = (cls == target_class)
80
+
81
+ color_bgr = (0, 0, 255) if is_target else (0, 200, 0) # rosso per target, verde per altre
82
+ cv2.rectangle(vis_bgr, (x1, y1), (x2, y2), color_bgr, 2)
83
+ # Niente label testuali
84
+
85
+ total_count += 1
86
+ if is_target:
87
+ target_count += 1
88
+
89
+ vis_rgb = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
90
+ counts_text = f"target='{target_class}': {target_count} | totale: {total_count}"
91
+ return vis_rgb, counts_text
92
+
93
+
94
+ def infer_single_image(
95
+ image: np.ndarray,
96
+ weights_path: str,
97
+ conf: float = 0.35,
98
+ slice_h: int = 640,
99
+ slice_w: int = 640,
100
+ overlap_h: float = 0.10,
101
+ overlap_w: float = 0.10,
102
+ device: str = "cuda:0",
103
+ target_class: str = "berry",
104
+ ):
105
+ """
106
+ Inferenzia una singola immagine usando SAHI come slicing/merging,
107
+ con pesi YOLOv11 di instance segmentation ma trattati come detection:
108
+ - SAHI usa AutoDetectionModel 'yolov8' (wrapper Ultralytics detection)
109
+ - Le mask sono ignorate, si usano i bounding box (come da .boxes)
110
+ - Si disegnano solo le bbox (senza label) e si riporta il conteggio per la classe target
111
+
112
+ Ritorna: (immagine_annotata_RGB, testo_contatori)
113
+ """
114
+ if image is None:
115
+ raise gr.Error("Devi caricare un'immagine.")
116
+
117
+ if not weights_path or not os.path.exists(weights_path):
118
+ raise gr.Error(f"File pesi non trovato: {weights_path}")
119
+
120
+ image_rgb = image.copy()
121
+
122
+ # SAHI accetta solo detection; usiamo il wrapper Ultralytics
123
+ model_type = "yolov8"
124
+
125
+ try:
126
+ detection_model = AutoDetectionModel.from_pretrained(
127
+ model_type=model_type,
128
+ model_path=weights_path,
129
+ confidence_threshold=conf,
130
+ device=device,
131
+ )
132
+ except Exception:
133
+ detection_model = AutoDetectionModel.from_pretrained(
134
+ model_type=model_type,
135
+ model_path=weights_path,
136
+ confidence_threshold=conf,
137
+ device="cpu",
138
+ )
139
+
140
+ result = get_sliced_prediction(
141
+ image_rgb,
142
+ detection_model,
143
+ slice_height=int(slice_h),
144
+ slice_width=int(slice_w),
145
+ overlap_height_ratio=float(overlap_h),
146
+ overlap_width_ratio=float(overlap_w),
147
+ postprocess_class_agnostic=False,
148
+ verbose=0,
149
+ )
150
+
151
+ vis_rgb, counts_text = _draw_boxes_rgb(image_rgb, result, target_class)
152
+ return vis_rgb, counts_text
153
+
154
+
155
+ def build_app():
156
+ with gr.Blocks(title="YOLOv11 SEG as Detection + SAHI - Owl-Nest") as demo:
157
+ gr.Markdown(
158
+ "## YOLOv11 Instance Segmentation usata come Detection con SAHI\n"
159
+ "- Carica un'immagine e lancia l'inferenza con pesi .pt Ultralytics (YOLOv11 segment).\n"
160
+ "- SAHI effettua slicing/merging ma tratta il modello come detection: vengono usati i bounding box (le mask sono ignorate).\n"
161
+ "- Plot: solo box, senza etichette; scarta box con lato > 70 px."
162
+ )
163
+
164
+ with gr.Row():
165
+ with gr.Column():
166
+ img_in = gr.Image(label="Immagine", type="numpy")
167
+ weights = gr.Textbox(
168
+ label="Percorso pesi (.pt)",
169
+ value="/home/mclm/phd/best.pt",
170
+ placeholder="es. src/scripts/best.pt",
171
+ )
172
+ target = gr.Textbox(label="Classe target", value="berry")
173
+
174
+ with gr.Row():
175
+ conf = gr.Slider(0.0, 1.0, value=0.35, step=0.01, label="Confidence")
176
+ device = gr.Dropdown(
177
+ ["cuda:0", "cpu"],
178
+ value="cuda:0",
179
+ label="Device",
180
+ )
181
+
182
+ with gr.Row():
183
+ slice_h = gr.Slider(64, 2048, value=640, step=32, label="Slice H")
184
+ slice_w = gr.Slider(64, 2048, value=640, step=32, label="Slice W")
185
+
186
+ with gr.Row():
187
+ overlap_h = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap H ratio")
188
+ overlap_w = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap W ratio")
189
+
190
+ run_btn = gr.Button("Esegui inferenza", variant="primary")
191
+
192
+ with gr.Column():
193
+ img_out = gr.Image(label="Risultato", type="numpy")
194
+ counts_out = gr.Textbox(label="Conteggi", interactive=False)
195
+
196
+ run_btn.click(
197
+ infer_single_image,
198
+ inputs=[img_in, weights, conf, slice_h, slice_w, overlap_h, overlap_w, device, target],
199
+ outputs=[img_out, counts_out],
200
+ )
201
+
202
+ return demo
203
+
204
+
205
+ if __name__ == "__main__":
206
+ app = build_app()
207
+ app.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False, show_api=False)
best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1ebf826a25ef9bdf4ae299fc7a0f398f2688c9e00fc045a4cd50d2e5db480f
3
+ size 5487827