Spaces:

Prospecto
/

grape_detection

Runtime error

App Files Files Community

colomboMk commited on Oct 15, 2025

Commit

125c303

verified ·

1 Parent(s): 7b14232

Update app.py

Browse files

Files changed (1) hide show

app.py +407 -222

app.py CHANGED Viewed

@@ -1,76 +1,143 @@
 import os
 import cv2
 import numpy as np
 import gradio as gr
 from sahi import AutoDetectionModel
 from sahi.predict import get_sliced_prediction
-# Prova a importare ultralytics per il modello di segmentazione nativo (senza SAHI)
 try:
     from ultralytics import YOLO
-    _ULTRALYTICS_AVAILABLE = True
 except Exception:
-    _ULTRALYTICS_AVAILABLE = False
-# Soglia massima consentita per il lato della bbox (in pixel) per il modello con SAHI
-MAX_SIDE_PX = 70
-def _draw_boxes_rgb(image_rgb: np.ndarray, result, target_class: str):
     """
-    Disegna solo le bbox sul frame RGB (niente etichette testuali).
-    - Evidenzia in rosso la classe target
-    - Le altre classi in verde
-    - Scarta le bbox con lato (max tra width e height) > MAX_SIDE_PX
-    Restituisce (immagine_annotata_RGB, counts_text)
     """
-    # Garantisci 3 canali
-    if image_rgb.ndim == 2:
-        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_GRAY2RGB)
-    elif image_rgb.shape[2] == 4:
-        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_RGBA2RGB)
     H, W = image_rgb.shape[:2]
-    # OpenCV disegna in BGR
-    vis_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
     target_count = 0
     total_count = 0
-    object_predictions = getattr(result, "object_prediction_list", []) or []
     for item in object_predictions:
-        # bbox
         try:
             x1, y1, x2, y2 = map(int, item.bbox.to_xyxy())
         except Exception:
             x1, y1 = int(getattr(item.bbox, "minx", 0)), int(getattr(item.bbox, "miny", 0))
             x2, y2 = int(getattr(item.bbox, "maxx", 0)), int(getattr(item.bbox, "maxy", 0))
-        # Clamp ai bordi immagine
-        x1 = max(0, min(x1, W - 1))
-        y1 = max(0, min(y1, H - 1))
-        x2 = max(0, min(x2, W - 1))
-        y2 = max(0, min(y2, H - 1))
-        # Normalizza coordinate in caso invertite
-        if x2 < x1:
-            x1, x2 = x2, x1
-        if y2 < y1:
-            y1, y2 = y2, y1
-        # Scarta bbox non valide
         w = max(0, x2 - x1)
         h = max(0, y2 - y1)
         if w == 0 or h == 0:
             continue
-        # Scarta le bbox con lato maggiore della soglia
         if max(w, h) > MAX_SIDE_PX:
             continue
-        # Scarta bbox con area non positiva (per sicurezza)
         area = getattr(item.bbox, "area", w * h)
         try:
             area_val = float(area() if callable(area) else area)
@@ -80,60 +147,58 @@ def _draw_boxes_rgb(image_rgb: np.ndarray, result, target_class: str):
             continue
         cls = getattr(item.category, "name", "unknown")
-        is_target = (cls == target_class)
-        color_bgr = (0, 0, 255) if is_target else (0, 200, 0)  # rosso per target, verde per altre
-        cv2.rectangle(vis_bgr, (x1, y1), (x2, y2), color_bgr, 2)
-        # Nessuna label testuale
         total_count += 1
         if is_target:
             target_count += 1
-    vis_rgb = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
-    counts_text = f"target='{target_class}': {target_count} | totale: {total_count}"
-    return vis_rgb, counts_text
-def _draw_segmentation_masks_rgb(image_rgb: np.ndarray, ulty_result, target_class: str, alpha: float = 0.45):
     """
-    Disegna le maschere di segmentazione (niente etichette testuali).
-    - Evidenzia in rosso la classe target
-    - Le altre classi in verde
-    - Restituisce (immagine_annotata_RGB, counts_text)
     """
-    if image_rgb.ndim == 2:
-        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_GRAY2RGB)
-    elif image_rgb.shape[2] == 4:
-        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_RGBA2RGB)
-    vis_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
-    # Estrarre info dal risultato Ultralytics
-    r = ulty_result
     names = getattr(r, "names", None)
     boxes = getattr(r, "boxes", None)
     masks = getattr(r, "masks", None)
     if boxes is None or len(boxes) == 0:
-        # Nessun oggetto
-        return cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB), f"target='{target_class}': 0 | totale: 0"
-    # Numero di istanze
     N = len(boxes)
-    # Prepara maschere (se presenti)
     mask_data = None
     if masks is not None and getattr(masks, "data", None) is not None:
         try:
-            mask_data = masks.data  # torch.Tensor [N, H, W]
         except Exception:
             mask_data = None
     target_count = 0
     total_count = 0
-    # Loop istanze
     for i in range(N):
         try:
             cls_idx = int(boxes.cls[i].item())
@@ -143,42 +208,44 @@ def _draw_segmentation_masks_rgb(image_rgb: np.ndarray, ulty_result, target_clas
         if isinstance(names, dict):
             cls_name = names.get(cls_idx, cls_name)
-        is_target = (cls_name == target_class)
-        color_bgr = (0, 0, 255) if is_target else (0, 200, 0)  # rosso per target, verde per altre
-        # Disegna mask se disponibile
         if mask_data is not None and i < len(mask_data):
             try:
                 m = mask_data[i]
                 m = m.detach().cpu().numpy()
-                m = (m > 0.5).astype(np.uint8)  # binarizza
-                # Assicurare dimensioni identiche a immagine
-                if m.shape[:2] != vis_bgr.shape[:2]:
-                    m = cv2.resize(m, (vis_bgr.shape[1], vis_bgr.shape[0]), interpolation=cv2.INTER_NEAREST)
-                # Overlay colore
-                overlay = np.zeros_like(vis_bgr, dtype=np.uint8)
-                overlay[m.astype(bool)] = color_bgr
-                vis_bgr = cv2.addWeighted(overlay, alpha, vis_bgr, 1 - alpha, 0)
-                # Contorno
                 cnts, _ = cv2.findContours(m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-                cv2.drawContours(vis_bgr, cnts, -1, color_bgr, 2)
             except Exception:
-                # fallback: disegna il bbox
                 try:
                     xyxy = boxes.xyxy[i].detach().cpu().numpy().astype(int)
                     x1, y1, x2, y2 = map(int, xyxy)
-                    cv2.rectangle(vis_bgr, (x1, y1), (x2, y2), color_bgr, 2)
                 except Exception:
                     pass
         else:
-            # Nessuna mask: disegna solo bbox
             try:
                 xyxy = boxes.xyxy[i].detach().cpu().numpy().astype(int)
                 x1, y1, x2, y2 = map(int, xyxy)
-                cv2.rectangle(vis_bgr, (x1, y1), (x2, y2), color_bgr, 2)
             except Exception:
                 pass
@@ -186,75 +253,42 @@ def _draw_segmentation_masks_rgb(image_rgb: np.ndarray, ulty_result, target_clas
         if is_target:
             target_count += 1
-    vis_rgb = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
-    counts_text = f"target='{target_class}': {target_count} | totale: {total_count}"
-    return vis_rgb, counts_text
-def infer_two_models(
-    image: np.ndarray,
-    weights_det_path: str,
-    conf_det: float,
-    slice_h: int,
-    slice_w: int,
-    overlap_h: float,
-    overlap_w: float,
-    device: str,
-    target_class: str,
-    weights_seg_path: str,
-    conf_seg: float,
-):
     """
-    Esegue inferenza su una singola immagine con due modelli:
-    - Modello A (Detection via SAHI): usa pesi YOLOv11 segment come detection, disegna solo bbox, filtra box con lato > MAX_SIDE_PX
-    - Modello B (Segmentation nativo YOLO): nessun SAHI, disegna solo maschere (niente etichette)
-    Restituisce 4 output: (img_det, counts_det, img_seg, counts_seg)
     """
-    if image is None:
-        raise gr.Error("Devi caricare un'immagine.")
-    if not weights_det_path or not os.path.exists(weights_det_path):
-        raise gr.Error(f"File pesi (Detection/SAHI) non trovato: {weights_det_path}")
-    if not weights_seg_path or not os.path.exists(weights_seg_path):
-        raise gr.Error(f"File pesi (Segmentation) non trovato: {weights_seg_path}")
-    if not _ULTRALYTICS_AVAILABLE:
-        raise gr.Error("Ultralytics non è installato per il modello di segmentazione. Installa con: pip install ultralytics")
-    image_rgb = image.copy()
-    model_type = "yolov11"
-    # Scelta automatica device se 'auto'
-    chosen_device = device
-    if device == "auto":
-        try:
-            import torch
-            chosen_device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        except Exception:
-            chosen_device = "cpu"
-    # =========================
-    # Modello A: Detection con SAHI (boxes only)
-    # =========================
-    try:
-        detection_model = AutoDetectionModel.from_pretrained(
-            model_type=model_type,
-            model_path=weights_det_path,
-            confidence_threshold=conf_det,
-            device=chosen_device,
-        )
-    except Exception:
-        detection_model = AutoDetectionModel.from_pretrained(
-            model_type=model_type,
-            model_path=weights_det_path,
-            confidence_threshold=conf_det,
-            device="cpu",
-        )
-    sahi_result = get_sliced_prediction(
         image_rgb,
-        detection_model,
         slice_height=int(slice_h),
         slice_width=int(slice_w),
         overlap_height_ratio=float(overlap_h),
@@ -263,106 +297,257 @@ def infer_two_models(
         verbose=0,
     )
-    det_vis_rgb, det_counts_text = _draw_boxes_rgb(image_rgb, sahi_result, target_class)
-    # =========================
-    # Modello B: YOLO Segmentation nativo (no SAHI)
-    # =========================
     try:
-        seg_model = YOLO(weights_seg_path)
-        # Nota: Ultralytics gestisce internamente il device; possiamo passarlo qui
-        # Se chosen_device è 'cpu' o 'cuda:0'
-        # Alcune versioni usano 'device' in predict(), altre in load/attr; .predict supporta device
-        seg_results = seg_model.predict(
-            source=image_rgb,
-            conf=conf_seg,
-            device=chosen_device,
-            verbose=False,
-        )
-        # Prendi il primo risultato
         r0 = seg_results[0] if isinstance(seg_results, (list, tuple)) else seg_results
     except Exception as e:
-        raise gr.Error(f"Errore durante l'inferenza del modello di segmentazione: {e}")
-    seg_vis_rgb, seg_counts_text = _draw_segmentation_masks_rgb(image_rgb, r0, target_class)
-    return det_vis_rgb, det_counts_text, seg_vis_rgb, seg_counts_text
 def build_app():
-    with gr.Blocks(title="Berries counting and bunches segmentation - Owl-Nest") as demo:
         gr.Markdown(
-            "- Carica un'immagine e lancia l'inferenza con due modelli YOLO.\n"
-            "- Modello A dedicato al rilevamento e conteggio di acini.\n"
-            "- Modello B dedicato alla segmentazione di grappoli."
         )
         with gr.Row():
-            with gr.Column():
                 img_in = gr.Image(label="Immagine", type="numpy")
-                gr.Markdown("### Pesi modelli")
-                weights_det = gr.Textbox(
-                    label="Percorso pesi Modello A",
-                    value="weights/berry.pt",
-                    placeholder="es. weights/best.pt",
-                )
-                weights_seg = gr.Textbox(
-                    label="Percorso pesi Modello B",
-                    value="weights/bunch.pt",
-                    placeholder="es. weights/seg.pt",
-                )
-                target = gr.Textbox(label="Classe target", value="berry")
-                gr.Markdown("### Parametri modello A")
-                with gr.Row():
-                    conf_det = gr.Slider(0.0, 1.0, value=0.35, step=0.01, label="Confidence (A)")
-                    device = gr.Dropdown(
-                        ["auto", "cuda:0", "cpu"],
-                        value="auto",
-                        label="Device",
                     )
                 with gr.Row():
-                    slice_h = gr.Slider(64, 2048, value=640, step=32, label="Slice H (A)")
-                    slice_w = gr.Slider(64, 2048, value=640, step=32, label="Slice W (A)")
                 with gr.Row():
-                    overlap_h = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap H ratio (A)")
-                    overlap_w = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap W ratio (A)")
-                gr.Markdown("### Parametri modello B")
-                conf_seg = gr.Slider(0.0, 1.0, value=0.35, step=0.01, label="Confidence (B)")
-                run_btn = gr.Button("Esegui inferenza", variant="primary")
-            with gr.Column():
-                gr.Markdown("### Risultato Modello A")
-                img_out_det = gr.Image(label="Detections (solo bbox)", type="numpy")
-                counts_out_det = gr.Textbox(label="Conteggi (A)", interactive=False)
-                gr.Markdown("### Risultato Modello B")
-                img_out_seg = gr.Image(label="Segmentazione (maschere)", type="numpy")
-                counts_out_seg = gr.Textbox(label="Conteggi (B)", interactive=False)
-        run_btn.click(
-            infer_two_models,
             inputs=[
-                img_in,
-                weights_det, conf_det,
-                slice_h, slice_w, overlap_h, overlap_w,
-                device,
-                target,
-                weights_seg, conf_seg
             ],
-            outputs=[img_out_det, counts_out_det, img_out_seg, counts_out_seg],
         )
-    return demo
 if __name__ == "__main__":
     demo = build_app()
-    # Su Spaces non è necessario specificare server_name o share
     demo.launch()

 import os
+# Route caches to /tmp to avoid filling the Space persistent storage
+os.environ.setdefault("HF_HOME", "/tmp/hf_home")
+os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/hf_home/transformers")
+os.environ.setdefault("HF_HUB_CACHE", "/tmp/hf_home/hub")
+os.environ.setdefault("TORCH_HOME", "/tmp/torch_home")
+os.environ.setdefault("PIP_DISABLE_PIP_VERSION_CHECK", "1")
 import cv2
+import time
+import shutil
 import numpy as np
 import gradio as gr
 from sahi import AutoDetectionModel
 from sahi.predict import get_sliced_prediction
+# Try to import ultralytics for native segmentation
 try:
     from ultralytics import YOLO
+    _ULTRA_OK = True
 except Exception:
+    _ULTRA_OK = False
+# Config
+MAX_SIDE_PX = 70  # filtro lato massimo bbox per modello A (SAHI)
+SEG_DEFAULT_ALPHA = 0.45
+# Simple global caches to avoid reloading models each click
+_DET_MODEL_CACHE = {}  # key: (weights_path, device) -> AutoDetectionModel
+_SEG_MODEL_CACHE = {}  # key: weights_path -> YOLO
+def _ensure_rgb(img: np.ndarray) -> np.ndarray:
+    if img is None:
+        return None
+    if img.ndim == 2:
+        return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    if img.shape[2] == 4:
+        return cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+    return img
+def _choose_device(user_choice: str) -> str:
+    if user_choice != "auto":
+        return user_choice
+    try:
+        import torch
+        return "cuda:0" if torch.cuda.is_available() else "cpu"
+    except Exception:
+        return "cpu"
+def _get_det_model(weights_path: str, device: str, conf: float):
     """
+    Returns a cached SAHI AutoDetectionModel. Updates confidence on the fly.
+    """
+    if not os.path.exists(weights_path):
+        raise gr.Error(f"Pesi detection non trovati: {weights_path}")
+    key = (weights_path, device)
+    model = _DET_MODEL_CACHE.get(key)
+    if model is None:
+        # SAHI uses yolov8 wrapper for Ultralytics models (works for v8/v9/v11)
+        try:
+            model = AutoDetectionModel.from_pretrained(
+                model_type="yolov8",
+                model_path=weights_path,
+                confidence_threshold=conf,
+                device=device,
+            )
+        except Exception:
+            # CPU fallback
+            model = AutoDetectionModel.from_pretrained(
+                model_type="yolov8",
+                model_path=weights_path,
+                confidence_threshold=conf,
+                device="cpu",
+            )
+        _DET_MODEL_CACHE[key] = model
+    else:
+        # Update confidence threshold if present
+        try:
+            model.confidence_threshold = float(conf)
+        except Exception:
+            pass
+    return model
+def _get_seg_model(weights_path: str):
+    if not _ULTRA_OK:
+        raise gr.Error("Ultralytics non installato. Installa con: pip install ultralytics")
+    if not os.path.exists(weights_path):
+        raise gr.Error(f"Pesi segmentation non trovati: {weights_path}")
+    model = _SEG_MODEL_CACHE.get(weights_path)
+    if model is None:
+        model = YOLO(weights_path)
+        _SEG_MODEL_CACHE[weights_path] = model
+    return model
+def _optimize_slicing_dims(H: int, W: int, slice_h: int, slice_w: int, overlap_h: float, overlap_w: float, auto_opt: bool):
+    if not auto_opt:
+        return int(slice_h), int(slice_w), float(overlap_h), float(overlap_w)
+    sh = min(int(slice_h), H)
+    sw = min(int(slice_w), W)
+    # If the image already fits in one slice, remove overlap to reduce work
+    oh = 0.0 if (H <= sh and W <= sw) else float(overlap_h)
+    ow = 0.0 if (H <= sh and W <= sw) else float(overlap_w)
+    return sh, sw, oh, ow
+def _draw_boxes_overlay(image_rgb: np.ndarray, sahi_result, target_class: str, use_target: bool):
+    """
+    Returns overlay_rgb (H,W,3), alpha_mask (H,W) uint8, counts_text
+    Only draws rectangles (no labels). Filters boxes with max side > MAX_SIDE_PX.
     """
     H, W = image_rgb.shape[:2]
+    overlay = np.zeros((H, W, 3), dtype=np.uint8)
+    alpha = np.zeros((H, W), dtype=np.uint8)
     target_count = 0
     total_count = 0
+    object_predictions = getattr(sahi_result, "object_prediction_list", []) or []
     for item in object_predictions:
+        # parse bbox
         try:
             x1, y1, x2, y2 = map(int, item.bbox.to_xyxy())
         except Exception:
             x1, y1 = int(getattr(item.bbox, "minx", 0)), int(getattr(item.bbox, "miny", 0))
             x2, y2 = int(getattr(item.bbox, "maxx", 0)), int(getattr(item.bbox, "maxy", 0))
+        # clamp and normalize
+        x1 = max(0, min(x1, W - 1)); x2 = max(0, min(x2, W - 1))
+        y1 = max(0, min(y1, H - 1)); y2 = max(0, min(y2, H - 1))
+        if x2 < x1: x1, x2 = x2, x1
+        if y2 < y1: y1, y2 = y2, y1
         w = max(0, x2 - x1)
         h = max(0, y2 - y1)
         if w == 0 or h == 0:
             continue
         if max(w, h) > MAX_SIDE_PX:
             continue
         area = getattr(item.bbox, "area", w * h)
         try:
             area_val = float(area() if callable(area) else area)
             continue
         cls = getattr(item.category, "name", "unknown")
+        is_target = (cls == target_class) if use_target else False
+        # Colors in BGR for OpenCV, convert later when compositing
+        color_bgr = (0, 0, 255) if is_target and use_target else (0, 200, 0)
+        # Draw on overlay (BGR)
+        cv2.rectangle(overlay, (x1, y1), (x2, y2), color_bgr, 2)
+        cv2.rectangle(alpha, (x1, y1), (x2, y2), 255, 2)
         total_count += 1
         if is_target:
             target_count += 1
+    # Convert overlay BGR -> RGB
+    overlay_rgb = cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
+    if use_target:
+        counts = f"target='{target_class}': {target_count} | totale: {total_count}"
+    else:
+        counts = f"totale: {total_count}"
+    return overlay_rgb, alpha, counts
+def _draw_seg_overlay(image_rgb: np.ndarray, yolo_result, target_class: str, use_target: bool, fill_alpha: float = SEG_DEFAULT_ALPHA):
     """
+    Returns overlay_rgb (H,W,3), alpha_mask (H,W) uint8, counts_text for segmentation
+    - Fills masks with color (red for target, green for others if target enabled; else green)
+    - Draws contour opaque
     """
+    H, W = image_rgb.shape[:2]
+    overlay_bgr = np.zeros((H, W, 3), dtype=np.uint8)
+    alpha = np.zeros((H, W), dtype=np.uint8)
+    r = yolo_result
     names = getattr(r, "names", None)
     boxes = getattr(r, "boxes", None)
     masks = getattr(r, "masks", None)
     if boxes is None or len(boxes) == 0:
+        counts = f"target='{target_class}': 0 | totale: 0" if use_target else "totale: 0"
+        return cv2.cvtColor(overlay_bgr, cv2.COLOR_BGR2RGB), alpha, counts
     N = len(boxes)
     mask_data = None
     if masks is not None and getattr(masks, "data", None) is not None:
         try:
+            mask_data = masks.data  # torch.Tensor [N, H, W] (prob/mask)
         except Exception:
             mask_data = None
     target_count = 0
     total_count = 0
+    fa255 = int(max(0.0, min(1.0, float(fill_alpha))) * 255)
     for i in range(N):
         try:
             cls_idx = int(boxes.cls[i].item())
         if isinstance(names, dict):
             cls_name = names.get(cls_idx, cls_name)
+        is_target = (cls_name == target_class) if use_target else False
+        color_bgr = (0, 0, 255) if is_target and use_target else (0, 200, 0)
         if mask_data is not None and i < len(mask_data):
             try:
                 m = mask_data[i]
                 m = m.detach().cpu().numpy()
+                m = (m > 0.5).astype(np.uint8)  # binary mask
+                if m.shape[:2] != (H, W):
+                    m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
+                # Fill color where mask is 1
+                overlay_bgr[m == 1] = color_bgr
+                # Alpha for fill
+                alpha[m == 1] = np.maximum(alpha[m == 1], fa255)
+                # Contours opaque
                 cnts, _ = cv2.findContours(m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                cv2.drawContours(overlay_bgr, cnts, -1, color_bgr, 2)
+                # Draw contour alpha to 255
+                cv2.drawContours(alpha, cnts, -1, 255, 2)
             except Exception:
+                # fallback to bbox
                 try:
                     xyxy = boxes.xyxy[i].detach().cpu().numpy().astype(int)
                     x1, y1, x2, y2 = map(int, xyxy)
+                    cv2.rectangle(overlay_bgr, (x1, y1), (x2, y2), color_bgr, 2)
+                    cv2.rectangle(alpha, (x1, y1), (x2, y2), 255, 2)
                 except Exception:
                     pass
         else:
+            # No mask: draw bbox
             try:
                 xyxy = boxes.xyxy[i].detach().cpu().numpy().astype(int)
                 x1, y1, x2, y2 = map(int, xyxy)
+                cv2.rectangle(overlay_bgr, (x1, y1), (x2, y2), color_bgr, 2)
+                cv2.rectangle(alpha, (x1, y1), (x2, y2), 255, 2)
             except Exception:
                 pass
         if is_target:
             target_count += 1
+    overlay_rgb = cv2.cvtColor(overlay_bgr, cv2.COLOR_BGR2RGB)
+    if use_target:
+        counts = f"target='{target_class}': {target_count} | totale: {total_count}"
+    else:
+        counts = f"totale: {total_count}"
+    return overlay_rgb, alpha, counts
+def _composite_layers(base_rgb: np.ndarray, layers: list):
     """
+    layers: list of dicts with keys:
+      - 'overlay' : np.ndarray HxWx3 RGB
+      - 'alpha'   : np.ndarray HxW uint8
+      - 'ts'      : float (timestamp), to control stacking order (oldest first)
+    Newest layer should be on top: sort by ts ascending and apply in order.
     """
+    if base_rgb is None:
+        return None
+    result = base_rgb.astype(np.float32)
+    # sort by timestamp (oldest first)
+    layers_sorted = sorted([l for l in layers if l is not None], key=lambda d: d["ts"])
+    for layer in layers_sorted:
+        ov = layer["overlay"].astype(np.float32)
+        a = (layer["alpha"].astype(np.float32) / 255.0)[..., None]  # HxWx1
+        if ov.shape[:2] != result.shape[:2]:
+            ov = cv2.resize(ov, (result.shape[1], result.shape[0]), interpolation=cv2.INTER_LINEAR)
+            a = cv2.resize(a, (result.shape[1], result.shape[0]), interpolation=cv2.INTER_LINEAR)[..., None]
+        # alpha blend only where a > 0
+        result = ov * a + result * (1.0 - a)
+    return np.clip(result, 0, 255).astype(np.uint8)
+def _sahi_predict(image_rgb: np.ndarray, det_model, slice_h, slice_w, overlap_h, overlap_w):
+    return get_sliced_prediction(
         image_rgb,
+        det_model,
         slice_height=int(slice_h),
         slice_width=int(slice_w),
         overlap_height_ratio=float(overlap_h),
         verbose=0,
     )
+# Gradio callables
+def on_image_upload(image, state):
+    """
+    Resetta gli overlay quando si carica una nuova immagine.
+    """
+    if image is None:
+        return None, {"base": None, "det": None, "seg": None, "det_counts": "", "seg_counts": ""}, "", ""
+    img_rgb = _ensure_rgb(image)
+    new_state = {"base": img_rgb, "det": None, "seg": None, "det_counts": "", "seg_counts": ""}
+    return img_rgb, new_state, "", ""
+def run_det(
+    image, state,
+    weights_det_path, conf_det, slice_h, slice_w, overlap_h, overlap_w, device,
+    target_class, use_target, auto_opt_slice
+):
+    """
+    Esegue il modello A (SAHI detection) e aggiorna solo l'overlay 'det'.
+    Recompone l'immagine finale con entrambi i layer (det + seg) nell'ordine temporale.
+    """
+    if state is None or state.get("base") is None:
+        raise gr.Error("Carica prima un'immagine.")
+    base = state["base"]
+    H, W = base.shape[:2]
+    det_model = _get_det_model(weights_det_path, _choose_device(device), conf_det)
+    sh, sw, oh, ow = _optimize_slicing_dims(H, W, slice_h, slice_w, overlap_h, overlap_w, auto_opt_slice)
+    sahi_res = _sahi_predict(base, det_model, sh, sw, oh, ow)
+    overlay_rgb, alpha, counts = _draw_boxes_overlay(base, sahi_res, target_class, bool(use_target))
+    state["det"] = {"overlay": overlay_rgb, "alpha": alpha, "ts": time.time()}
+    state["det_counts"] = counts
+    layers = [state["det"], state.get("seg")]
+    composite = _composite_layers(base, layers)
+    return composite, state, state["det_counts"], state.get("seg_counts", "")
+def run_seg(
+    image, state,
+    weights_seg_path, conf_seg, device,
+    target_class, use_target, seg_alpha
+):
+    """
+    Esegue il modello B (YOLO segmentation) e aggiorna solo l'overlay 'seg'.
+    Recompone l'immagine finale con entrambi i layer (det + seg) nell'ordine temporale.
+    """
+    if state is None or state.get("base") is None:
+        raise gr.Error("Carica prima un'immagine.")
+    base = state["base"]
+    seg_model = _get_seg_model(weights_seg_path)
+    # device is handled in predict
     try:
+        seg_results = seg_model.predict(source=base, conf=float(conf_seg), device=_choose_device(device), verbose=False)
         r0 = seg_results[0] if isinstance(seg_results, (list, tuple)) else seg_results
     except Exception as e:
+        raise gr.Error(f"Errore inferenza segmentation: {e}")
+    overlay_rgb, alpha, counts = _draw_seg_overlay(base, r0, target_class, bool(use_target), float(seg_alpha))
+    state["seg"] = {"overlay": overlay_rgb, "alpha": alpha, "ts": time.time()}
+    state["seg_counts"] = counts
+    layers = [state.get("det"), state["seg"]]
+    composite = _composite_layers(base, layers)
+    return composite, state, state.get("det_counts", ""), state["seg_counts"]
+def clear_overlays(image, state):
+    if state is None or state.get("base") is None:
+        return None, {"base": None, "det": None, "seg": None, "det_counts": "", "seg_counts": ""}, "", ""
+    base = state["base"]
+    state["det"] = None
+    state["seg"] = None
+    state["det_counts"] = ""
+    state["seg_counts"] = ""
+    return base, state, "", ""
+# Maintenance helpers
+def _dir_size(path: str) -> int:
+    try:
+        total = 0
+        for root, _, files in os.walk(path):
+            for f in files:
+                fp = os.path.join(root, f)
+                try:
+                    total += os.path.getsize(fp)
+                except Exception:
+                    pass
+        return total
+    except Exception:
+        return 0
+def _fmt_bytes(n: int) -> str:
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if n < 1024.0:
+            return f"{n:.1f} {unit}"
+        n /= 1024.0
+    return f"{n:.1f} PB"
+def check_storage():
+    # Key cache locations
+    paths = [
+        os.path.expanduser("~/.cache/huggingface/hub"),
+        os.path.expanduser("~/.cache/torch"),
+        os.path.expanduser("~/.cache/pip"),
+        os.path.expanduser("~/.config/Ultralytics"),
+        "/tmp/hf_home/hub",
+        "/tmp/torch_home",
+    ]
+    lines = []
+    total_used = 0
+    for p in paths:
+        sz = _dir_size(p) if os.path.exists(p) else 0
+        total_used += sz
+        lines.append(f"{p}: {_fmt_bytes(sz)}")
+    try:
+        total, used, free = shutil.disk_usage("/")
+        disk_line = f"Disk usage: used {_fmt_bytes(used)} / total {_fmt_bytes(total)} (free {_fmt_bytes(free)})"
+    except Exception:
+        disk_line = "Disk usage: n/a"
+    return "Cache sizes:\n" + "\n".join(lines) + "\n" + disk_line
+def clean_caches():
+    paths = [
+        os.path.expanduser("~/.cache/huggingface/hub"),
+        os.path.expanduser("~/.cache/torch"),
+        os.path.expanduser("~/.cache/pip"),
+        os.path.expanduser("~/.config/Ultralytics"),
+        "/tmp/hf_home",
+        "/tmp/torch_home",
+    ]
+    removed = []
+    for p in paths:
+        try:
+            if os.path.exists(p):
+                shutil.rmtree(p, ignore_errors=True)
+                removed.append(p)
+        except Exception:
+            pass
+    return "Removed:\n" + ("\n".join(removed) if removed else "(none)")
 def build_app():
+    with gr.Blocks(title="YOLOv11 SAHI Detection + YOLO Segmentation (dual overlays)") as demo:
         gr.Markdown(
+            "## Doppia inferenza su stessa immagine, overlay combinati\n"
+            "- Modello A: SAHI detection (usa pesi YOLOv11 seg come detection) — solo bbox, filtro lato > 70px.\n"
+            "- Modello B: YOLO segmentation nativo — maschere riempite + contorno.\n"
+            "- Esegui i modelli con pulsanti separati; gli overlay si accumulano sull'immagine base (nuovo overlay sopra).\n"
+            "- Opzionale: disabilita l'evidenziazione della classe target se non ti serve."
         )
+        state = gr.State({"base": None, "det": None, "seg": None, "det_counts": "", "seg_counts": ""})
         with gr.Row():
+            with gr.Column(scale=1):
                 img_in = gr.Image(label="Immagine", type="numpy")
+                with gr.Accordion("Pesi modelli", open=True):
+                    weights_det = gr.Textbox(
+                        label="Pesi Modello A (Detection + SAHI, .pt)",
+                        value="weights/best.pt",
+                    )
+                    weights_seg = gr.Textbox(
+                        label="Pesi Modello B (Segmentation, .pt)",
+                        value="weights/seg.pt",
                     )
                 with gr.Row():
+                    target = gr.Textbox(label="Classe target", value="berry")
+                    use_target = gr.Checkbox(label="Usa classe target", value=True)
+                with gr.Tab("Modello A — SAHI Detection"):
+                    with gr.Row():
+                        conf_det = gr.Slider(0.0, 1.0, value=0.35, step=0.01, label="Confidence (A)")
+                        device_a = gr.Dropdown(["auto", "cuda:0", "cpu"], value="auto", label="Device")
+                    with gr.Row():
+                        slice_h = gr.Slider(64, 2048, value=640, step=32, label="Slice H (A)")
+                        slice_w = gr.Slider(64, 2048, value=640, step=32, label="Slice W (A)")
+                    with gr.Row():
+                        overlap_h = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap H (A)")
+                        overlap_w = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap W (A)")
+                    auto_opt_slice = gr.Checkbox(label="Ottimizza slicing su immagini piccole", value=True)
+                    btn_det = gr.Button("Esegui Modello A (SAHI)")
+                with gr.Tab("Modello B — YOLO Segmentation"):
+                    with gr.Row():
+                        conf_seg = gr.Slider(0.0, 1.0, value=0.35, step=0.01, label="Confidence (B)")
+                        seg_alpha = gr.Slider(0.0, 1.0, value=SEG_DEFAULT_ALPHA, step=0.05, label="Alpha maschere (B)")
+                        device_b = gr.Dropdown(["auto", "cuda:0", "cpu"], value="auto", label="Device")
+                    btn_seg = gr.Button("Esegui Modello B (Seg)")
                 with gr.Row():
+                    btn_clear = gr.Button("Pulisci overlay", variant="secondary")
+                with gr.Accordion("Manutenzione spazio", open=False):
+                    btn_check = gr.Button("Controlla storage")
+                    btn_clean = gr.Button("Pulisci cache")
+                    maint_out = gr.Textbox(label="Log manutenzione", interactive=False)
+            with gr.Column(scale=2):
+                img_out = gr.Image(label="Risultato combinato", type="numpy")
+                with gr.Row():
+                    counts_out_det = gr.Textbox(label="Conteggi (A)", interactive=False)
+                    counts_out_seg = gr.Textbox(label="Conteggi (B)", interactive=False)
+        # Wiring
+        img_in.change(
+            on_image_upload,
+            inputs=[img_in, state],
+            outputs=[img_out, state, counts_out_det, counts_out_seg],
+        )
+        btn_det.click(
+            run_det,
+            inputs=[
+                img_in, state,
+                weights_det, conf_det, slice_h, slice_w, overlap_h, overlap_w, device_a,
+                target, use_target, auto_opt_slice
+            ],
+            outputs=[img_out, state, counts_out_det, counts_out_seg],
+        )
+        btn_seg.click(
+            run_seg,
             inputs=[
+                img_in, state,
+                weights_seg, conf_seg, device_b,
+                target, use_target, seg_alpha
             ],
+            outputs=[img_out, state, counts_out_det, counts_out_seg],
         )
+        btn_clear.click(
+            clear_overlays,
+            inputs=[img_in, state],
+            outputs=[img_out, state, counts_out_det, counts_out_seg],
+        )
+        btn_check.click(
+            check_storage,
+            inputs=[],
+            outputs=[maint_out],
+        )
+        btn_clean.click(
+            clean_caches,
+            inputs=[],
+            outputs=[maint_out],
+        )
+    return demo
 if __name__ == "__main__":
     demo = build_app()
     demo.launch()