Spaces:

Prospecto
/

grape_detection

Runtime error

App Files Files Community

colomboMk commited on Oct 14, 2025

Commit

a1d7bb7

verified ·

1 Parent(s): 715a3cf

Create app.py

Browse files

Files changed (1) hide show

app.py +368 -0

app.py ADDED Viewed

	@@ -0,0 +1,368 @@

+import os
+import cv2
+import numpy as np
+import gradio as gr
+from sahi import AutoDetectionModel
+from sahi.predict import get_sliced_prediction
+# Prova a importare ultralytics per il modello di segmentazione nativo (senza SAHI)
+try:
+    from ultralytics import YOLO
+    _ULTRALYTICS_AVAILABLE = True
+except Exception:
+    _ULTRALYTICS_AVAILABLE = False
+# Soglia massima consentita per il lato della bbox (in pixel) per il modello con SAHI
+MAX_SIDE_PX = 70
+def _draw_boxes_rgb(image_rgb: np.ndarray, result, target_class: str):
+    """
+    Disegna solo le bbox sul frame RGB (niente etichette testuali).
+    - Evidenzia in rosso la classe target
+    - Le altre classi in verde
+    - Scarta le bbox con lato (max tra width e height) > MAX_SIDE_PX
+    Restituisce (immagine_annotata_RGB, counts_text)
+    """
+    # Garantisci 3 canali
+    if image_rgb.ndim == 2:
+        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_GRAY2RGB)
+    elif image_rgb.shape[2] == 4:
+        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_RGBA2RGB)
+    H, W = image_rgb.shape[:2]
+    # OpenCV disegna in BGR
+    vis_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
+    target_count = 0
+    total_count = 0
+    object_predictions = getattr(result, "object_prediction_list", []) or []
+    for item in object_predictions:
+        # bbox
+        try:
+            x1, y1, x2, y2 = map(int, item.bbox.to_xyxy())
+        except Exception:
+            x1, y1 = int(getattr(item.bbox, "minx", 0)), int(getattr(item.bbox, "miny", 0))
+            x2, y2 = int(getattr(item.bbox, "maxx", 0)), int(getattr(item.bbox, "maxy", 0))
+        # Clamp ai bordi immagine
+        x1 = max(0, min(x1, W - 1))
+        y1 = max(0, min(y1, H - 1))
+        x2 = max(0, min(x2, W - 1))
+        y2 = max(0, min(y2, H - 1))
+        # Normalizza coordinate in caso invertite
+        if x2 < x1:
+            x1, x2 = x2, x1
+        if y2 < y1:
+            y1, y2 = y2, y1
+        # Scarta bbox non valide
+        w = max(0, x2 - x1)
+        h = max(0, y2 - y1)
+        if w == 0 or h == 0:
+            continue
+        # Scarta le bbox con lato maggiore della soglia
+        if max(w, h) > MAX_SIDE_PX:
+            continue
+        # Scarta bbox con area non positiva (per sicurezza)
+        area = getattr(item.bbox, "area", w * h)
+        try:
+            area_val = float(area() if callable(area) else area)
+        except Exception:
+            area_val = float(w * h)
+        if area_val <= 0:
+            continue
+        cls = getattr(item.category, "name", "unknown")
+        is_target = (cls == target_class)
+        color_bgr = (0, 0, 255) if is_target else (0, 200, 0)  # rosso per target, verde per altre
+        cv2.rectangle(vis_bgr, (x1, y1), (x2, y2), color_bgr, 2)
+        # Nessuna label testuale
+        total_count += 1
+        if is_target:
+            target_count += 1
+    vis_rgb = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
+    counts_text = f"target='{target_class}': {target_count} | totale: {total_count}"
+    return vis_rgb, counts_text
+def _draw_segmentation_masks_rgb(image_rgb: np.ndarray, ulty_result, target_class: str, alpha: float = 0.45):
+    """
+    Disegna le maschere di segmentazione (niente etichette testuali).
+    - Evidenzia in rosso la classe target
+    - Le altre classi in verde
+    - Restituisce (immagine_annotata_RGB, counts_text)
+    """
+    if image_rgb.ndim == 2:
+        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_GRAY2RGB)
+    elif image_rgb.shape[2] == 4:
+        image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_RGBA2RGB)
+    vis_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
+    # Estrarre info dal risultato Ultralytics
+    r = ulty_result
+    names = getattr(r, "names", None)
+    boxes = getattr(r, "boxes", None)
+    masks = getattr(r, "masks", None)
+    if boxes is None or len(boxes) == 0:
+        # Nessun oggetto
+        return cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB), f"target='{target_class}': 0 | totale: 0"
+    # Numero di istanze
+    N = len(boxes)
+    # Prepara maschere (se presenti)
+    mask_data = None
+    if masks is not None and getattr(masks, "data", None) is not None:
+        try:
+            mask_data = masks.data  # torch.Tensor [N, H, W]
+        except Exception:
+            mask_data = None
+    target_count = 0
+    total_count = 0
+    # Loop istanze
+    for i in range(N):
+        try:
+            cls_idx = int(boxes.cls[i].item())
+        except Exception:
+            cls_idx = -1
+        cls_name = str(cls_idx)
+        if isinstance(names, dict):
+            cls_name = names.get(cls_idx, cls_name)
+        is_target = (cls_name == target_class)
+        color_bgr = (0, 0, 255) if is_target else (0, 200, 0)  # rosso per target, verde per altre
+        # Disegna mask se disponibile
+        if mask_data is not None and i < len(mask_data):
+            try:
+                m = mask_data[i]
+                m = m.detach().cpu().numpy()
+                m = (m > 0.5).astype(np.uint8)  # binarizza
+                # Assicurare dimensioni identiche a immagine
+                if m.shape[:2] != vis_bgr.shape[:2]:
+                    m = cv2.resize(m, (vis_bgr.shape[1], vis_bgr.shape[0]), interpolation=cv2.INTER_NEAREST)
+                # Overlay colore
+                overlay = np.zeros_like(vis_bgr, dtype=np.uint8)
+                overlay[m.astype(bool)] = color_bgr
+                vis_bgr = cv2.addWeighted(overlay, alpha, vis_bgr, 1 - alpha, 0)
+                # Contorno
+                cnts, _ = cv2.findContours(m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                cv2.drawContours(vis_bgr, cnts, -1, color_bgr, 2)
+            except Exception:
+                # fallback: disegna il bbox
+                try:
+                    xyxy = boxes.xyxy[i].detach().cpu().numpy().astype(int)
+                    x1, y1, x2, y2 = map(int, xyxy)
+                    cv2.rectangle(vis_bgr, (x1, y1), (x2, y2), color_bgr, 2)
+                except Exception:
+                    pass
+        else:
+            # Nessuna mask: disegna solo bbox
+            try:
+                xyxy = boxes.xyxy[i].detach().cpu().numpy().astype(int)
+                x1, y1, x2, y2 = map(int, xyxy)
+                cv2.rectangle(vis_bgr, (x1, y1), (x2, y2), color_bgr, 2)
+            except Exception:
+                pass
+        total_count += 1
+        if is_target:
+            target_count += 1
+    vis_rgb = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
+    counts_text = f"target='{target_class}': {target_count} | totale: {total_count}"
+    return vis_rgb, counts_text
+def infer_two_models(
+    image: np.ndarray,
+    weights_det_path: str,
+    conf_det: float,
+    slice_h: int,
+    slice_w: int,
+    overlap_h: float,
+    overlap_w: float,
+    device: str,
+    target_class: str,
+    weights_seg_path: str,
+    conf_seg: float,
+):
+    """
+    Esegue inferenza su una singola immagine con due modelli:
+    - Modello A (Detection via SAHI): usa pesi YOLOv11 segment come detection, disegna solo bbox, filtra box con lato > MAX_SIDE_PX
+    - Modello B (Segmentation nativo YOLO): nessun SAHI, disegna solo maschere (niente etichette)
+    Restituisce 4 output: (img_det, counts_det, img_seg, counts_seg)
+    """
+    if image is None:
+        raise gr.Error("Devi caricare un'immagine.")
+    if not weights_det_path or not os.path.exists(weights_det_path):
+        raise gr.Error(f"File pesi (Detection/SAHI) non trovato: {weights_det_path}")
+    if not weights_seg_path or not os.path.exists(weights_seg_path):
+        raise gr.Error(f"File pesi (Segmentation) non trovato: {weights_seg_path}")
+    if not _ULTRALYTICS_AVAILABLE:
+        raise gr.Error("Ultralytics non è installato per il modello di segmentazione. Installa con: pip install ultralytics")
+    image_rgb = image.copy()
+    model_type = "yolov11"
+    # Scelta automatica device se 'auto'
+    chosen_device = device
+    if device == "auto":
+        try:
+            import torch
+            chosen_device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        except Exception:
+            chosen_device = "cpu"
+    # =========================
+    # Modello A: Detection con SAHI (boxes only)
+    # =========================
+    try:
+        detection_model = AutoDetectionModel.from_pretrained(
+            model_type=model_type,
+            model_path=weights_det_path,
+            confidence_threshold=conf_det,
+            device=chosen_device,
+        )
+    except Exception:
+        detection_model = AutoDetectionModel.from_pretrained(
+            model_type=model_type,
+            model_path=weights_det_path,
+            confidence_threshold=conf_det,
+            device="cpu",
+        )
+    sahi_result = get_sliced_prediction(
+        image_rgb,
+        detection_model,
+        slice_height=int(slice_h),
+        slice_width=int(slice_w),
+        overlap_height_ratio=float(overlap_h),
+        overlap_width_ratio=float(overlap_w),
+        postprocess_class_agnostic=False,
+        verbose=0,
+    )
+    det_vis_rgb, det_counts_text = _draw_boxes_rgb(image_rgb, sahi_result, target_class)
+    # =========================
+    # Modello B: YOLO Segmentation nativo (no SAHI)
+    # =========================
+    try:
+        seg_model = YOLO(weights_seg_path)
+        # Nota: Ultralytics gestisce internamente il device; possiamo passarlo qui
+        # Se chosen_device è 'cpu' o 'cuda:0'
+        # Alcune versioni usano 'device' in predict(), altre in load/attr; .predict supporta device
+        seg_results = seg_model.predict(
+            source=image_rgb,
+            conf=conf_seg,
+            device=chosen_device,
+            verbose=False,
+        )
+        # Prendi il primo risultato
+        r0 = seg_results[0] if isinstance(seg_results, (list, tuple)) else seg_results
+    except Exception as e:
+        raise gr.Error(f"Errore durante l'inferenza del modello di segmentazione: {e}")
+    seg_vis_rgb, seg_counts_text = _draw_segmentation_masks_rgb(image_rgb, r0, target_class)
+    return det_vis_rgb, det_counts_text, seg_vis_rgb, seg_counts_text
+def build_app():
+    with gr.Blocks(title="Berries counting and bunches segmentation - Owl-Nest") as demo:
+        gr.Markdown(
+            "- Carica un'immagine e lancia l'inferenza con due modelli YOLO.\n"
+            "- Modello A dedicato al rilevamento e conteggio di acini.\n"
+            "- Modello B dedicato alla segmentazione di grappoli."
+        )
+        with gr.Row():
+            with gr.Column():
+                img_in = gr.Image(label="Immagine", type="numpy")
+                gr.Markdown("### Pesi modelli")
+                weights_det = gr.Textbox(
+                    label="Percorso pesi Modello A",
+                    value="weights/berry.pt",
+                    placeholder="es. weights/best.pt",
+                )
+                weights_seg = gr.Textbox(
+                    label="Percorso pesi Modello B",
+                    value="weights/bunch.pt",
+                    placeholder="es. weights/seg.pt",
+                )
+                target = gr.Textbox(label="Classe target", value="berry")
+                gr.Markdown("### Parametri modello A")
+                with gr.Row():
+                    conf_det = gr.Slider(0.0, 1.0, value=0.35, step=0.01, label="Confidence (A)")
+                    device = gr.Dropdown(
+                        ["auto", "cuda:0", "cpu"],
+                        value="auto",
+                        label="Device",
+                    )
+                with gr.Row():
+                    slice_h = gr.Slider(64, 2048, value=640, step=32, label="Slice H (A)")
+                    slice_w = gr.Slider(64, 2048, value=640, step=32, label="Slice W (A)")
+                with gr.Row():
+                    overlap_h = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap H ratio (A)")
+                    overlap_w = gr.Slider(0.0, 0.9, value=0.10, step=0.01, label="Overlap W ratio (A)")
+                gr.Markdown("### Parametri modello B")
+                conf_seg = gr.Slider(0.0, 1.0, value=0.35, step=0.01, label="Confidence (B)")
+                run_btn = gr.Button("Esegui inferenza", variant="primary")
+            with gr.Column():
+                gr.Markdown("### Risultato Modello A")
+                img_out_det = gr.Image(label="Detections (solo bbox)", type="numpy")
+                counts_out_det = gr.Textbox(label="Conteggi (A)", interactive=False)
+                gr.Markdown("### Risultato Modello B")
+                img_out_seg = gr.Image(label="Segmentazione (maschere)", type="numpy")
+                counts_out_seg = gr.Textbox(label="Conteggi (B)", interactive=False)
+        run_btn.click(
+            infer_two_models,
+            inputs=[
+                img_in,
+                weights_det, conf_det,
+                slice_h, slice_w, overlap_h, overlap_w,
+                device,
+                target,
+                weights_seg, conf_seg
+            ],
+            outputs=[img_out_det, counts_out_det, img_out_seg, counts_out_seg],
+        )
+    return demo
+if __name__ == "__main__":
+    demo = build_app()
+    # Su Spaces non è necessario specificare server_name o share
+    demo.launch()