Spaces:

Ronny56
/

yolo

Sleeping

App Files Files Community

Ronny56 commited on Oct 29, 2025

Commit

cddfd6c

verified ·

1 Parent(s): 3cbb007

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -101

app.py CHANGED Viewed

@@ -1,21 +1,84 @@
 import gradio as gr
-from ultralytics import YOLO, SAM
 import numpy as np
 import cv2
-# Carica modelli
-print("Caricamento modelli...")
 detector = YOLO('best.pt')
-segmenter = SAM('mobile_sam.pt')
-print("Modelli caricati!")
-def detect_and_segment_balloons(image, confidence):
     if image is None:
         return None, {"error": "Nessuna immagine"}
     print(f"1. Rilevamento balloon con confidenza: {confidence}")
-    # Step 1: Rileva balloon con YOLO
     detection_results = detector(image, conf=confidence, verbose=False)
     output = {
@@ -34,99 +97,89 @@ def detect_and_segment_balloons(image, confidence):
     annotated = image.copy()
     h, w = image.shape[:2]
-    # Step 2: Per ogni balloon, usa SAM per segmentazione precisa
     for i in range(len(detection_results[0].boxes)):
         box = detection_results[0].boxes.xyxy[i].cpu().numpy()
         conf = float(detection_results[0].boxes.conf[i].cpu().numpy())
-        print(f"3. Segmentazione balloon {i+1}...")
-        try:
-            # Usa SAM con il bounding box come prompt
-            seg_results = segmenter(image, bboxes=[box], verbose=False)
-            detection_data = {
-                'balloon_id': i + 1,
-                'box': {
-                    'x1': int(box[0]),
-                    'y1': int(box[1]),
-                    'x2': int(box[2]),
-                    'y2': int(box[3])
-                },
-                'confidence': round(conf, 3)
-            }
-            # Estrai la mask precisa da SAM
-            if seg_results[0].masks is not None and len(seg_results[0].masks.data) > 0:
-                # Ottieni mask e converti tipo PRIMA di ridimensionare
-                mask = seg_results[0].masks.data[0].cpu().numpy()
-                mask = mask.astype(np.float32)  # ✅ Converti a float32
-                # Ridimensiona alle dimensioni originali
-                mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
-                # Converti in mask binaria (0/255)
-                mask_binary = (mask_resized > 0.5).astype(np.uint8)
-                mask_uint8 = mask_binary * 255
-                # Trova contorno del balloon
-                contours, _ = cv2.findContours(mask_binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-                if len(contours) > 0:
-                    # Prendi il contorno più grande
-                    largest_contour = max(contours, key=cv2.contourArea)
-                    # Converti contorno in lista di punti
-                    polygon = largest_contour.reshape(-1, 2).tolist()
-                    detection_data['polygon'] = polygon
-                    detection_data['num_points'] = len(polygon)
-                    # ✅ MASK COME BACCHETTA MAGICA (0/255)
-                    detection_data['mask'] = mask_uint8.tolist()
-                    detection_data['mask_shape'] = [h, w]
-                    # Disegna contorno verde sull'immagine
-                    cv2.drawContours(annotated, [largest_contour], -1, (0, 255, 0), 2)
-                    # Aggiungi etichetta
-                    x, y = int(box[0]), int(box[1]) - 10
-                    cv2.putText(annotated, f"B{i+1}", (x, y),
-                               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
-                    detection_data['has_mask'] = True
-                    print(f"   ✅ Balloon {i+1} segmentato: {len(polygon)} punti")
                 else:
                     detection_data['has_mask'] = False
-                    print(f"   ⚠️ Balloon {i+1}: nessun contorno trovato")
-            else:
                 detection_data['has_mask'] = False
-                print(f"   ⚠️ Balloon {i+1}: SAM non ha prodotto mask")
-            output['detections'].append(detection_data)
-        except Exception as e:
-            print(f"   ❌ Errore segmentazione balloon {i+1}: {str(e)}")
-            # Aggiungi comunque detection senza mask
-            output['detections'].append({
-                'balloon_id': i + 1,
-                'box': {
-                    'x1': int(box[0]),
-                    'y1': int(box[1]),
-                    'x2': int(box[2]),
-                    'y2': int(box[3])
-                },
-                'confidence': round(conf, 3),
-                'has_mask': False,
-                'error': str(e)
-            })
     print(f"4. Completato! {output['num_balloons']} balloon processati")
     return annotated, output
 # Interface Gradio
-with gr.Blocks(title="Balloon Segmentation") as demo:
-    gr.Markdown("# 🎈 Segmentazione Precisa Balloon")
-    gr.Markdown("**Rileva balloon e crea mask precise pixel-per-pixel (come bacchetta magica)**")
     with gr.Row():
         with gr.Column():
@@ -138,36 +191,42 @@ with gr.Blocks(title="Balloon Segmentation") as demo:
                 step=0.05,
                 label="🎯 Confidenza Detection"
             )
-            segment_btn = gr.Button("✂️ Segmenta Balloon", variant="primary", size="lg")
         with gr.Column():
-            output_image = gr.Image(label="✅ Balloon Segmentati (contorni verdi)")
-            output_json = gr.JSON(label="📊 Dati con Mask Precise")
     gr.Markdown("""
     ### 📖 Formato Output per Ogni Balloon:
-    - **mask**: Array 2D (0/255) - **USA QUESTO come selezione!**
-    - **polygon**: Lista punti [x, y] del contorno
-    - **box**: Rettangolo {x1, y1, x2, y2}
     - **mask_shape**: [altezza, larghezza] della mask
-    ### 🎯 Come Usare nella Tua App React:
-    La **mask** è identica alla selezione della bacchetta magica:
-    - Pixel **255** = dentro il balloon (selezionato - bianco)
-    - Pixel **0** = fuori dal balloon (non selezionato - nero)
-    ### ⚙️ Note Tecniche:
-    - **Detection**: YOLO trova i balloon (veloce)
-    - **Segmentation**: SAM crea mask precise (più lento ma accurato)
-    - Su CPU può richiedere 10-20 secondi per immagine
     """)
-    segment_btn.click(
         fn=detect_and_segment_balloons,
-        inputs=[input_image, confidence],
         outputs=[output_image, output_json]
     )
-print("✅ App di segmentazione avviata!")
 demo.launch()

 import gradio as gr
+from ultralytics import YOLO
 import numpy as np
 import cv2
+# Carica il modello detection
+print("Caricamento modello...")
 detector = YOLO('best.pt')
+print("Modello caricato!")
+def get_inpaint_bboxes(xyxy, img):
+    """
+    Versione semplificata di get_inpaint_bboxes da comic-translate.
+    Crea mask precise del balloon usando image processing.
+    """
+    x1, y1, x2, y2 = [int(coord) for coord in xyxy]
+    # Estrai la regione del balloon
+    h, w = img.shape[:2]
+    x1 = max(0, x1)
+    y1 = max(0, y1)
+    x2 = min(w, x2)
+    y2 = min(h, y2)
+    balloon_region = img[y1:y2, x1:x2]
+    if balloon_region.size == 0:
+        return None
+    # Converti in grayscale
+    if len(balloon_region.shape) == 3:
+        gray = cv2.cvtColor(balloon_region, cv2.COLOR_RGB2GRAY)
+    else:
+        gray = balloon_region
+    # Threshold per isolare il testo (di solito nero su bianco)
+    # I balloon sono solitamente bianchi/chiari, il testo scuro
+    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    # Inverti se necessario (vogliamo testo = bianco, sfondo = nero)
+    # Conta pixel bianchi e neri per decidere
+    white_pixels = np.sum(binary == 255)
+    black_pixels = np.sum(binary == 0)
+    if white_pixels > black_pixels:
+        # Più bianco che nero = balloon chiaro con testo scuro
+        binary = cv2.bitwise_not(binary)
+    # Dilata leggermente per unire caratteri vicini
+    kernel = np.ones((3, 3), np.uint8)
+    dilated = cv2.dilate(binary, kernel, iterations=2)
+    # Trova contorni delle aree di testo
+    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # Crea mask finale
+    mask = np.zeros_like(gray)
+    # Riempi tutti i contorni trovati
+    for contour in contours:
+        area = cv2.contourArea(contour)
+        # Filtra contorni troppo piccoli (rumore)
+        if area > 20:  # Minimo 20 pixel
+            cv2.drawContours(mask, [contour], -1, 255, -1)
+    # Crea mask full-size dell'immagine originale
+    full_mask = np.zeros((h, w), dtype=np.uint8)
+    full_mask[y1:y2, x1:x2] = mask
+    return full_mask
+def detect_and_segment_balloons(image, confidence, use_segmentation):
+    """
+    Rileva balloon e opzionalmente crea mask di segmentazione.
+    """
     if image is None:
         return None, {"error": "Nessuna immagine"}
     print(f"1. Rilevamento balloon con confidenza: {confidence}")
+    # Step 1: Detection con YOLO
     detection_results = detector(image, conf=confidence, verbose=False)
     output = {
     annotated = image.copy()
     h, w = image.shape[:2]
+    # Step 2: Per ogni balloon, crea mask di segmentazione
     for i in range(len(detection_results[0].boxes)):
         box = detection_results[0].boxes.xyxy[i].cpu().numpy()
         conf = float(detection_results[0].boxes.conf[i].cpu().numpy())
+        detection_data = {
+            'balloon_id': i + 1,
+            'box': {
+                'x1': int(box[0]),
+                'y1': int(box[1]),
+                'x2': int(box[2]),
+                'y2': int(box[3])
+            },
+            'confidence': round(conf, 3)
+        }
+        if use_segmentation:
+            print(f"3. Segmentazione balloon {i+1}...")
+            try:
+                # Usa get_inpaint_bboxes per creare la mask
+                mask = get_inpaint_bboxes(box, image)
+                if mask is not None and mask.any():
+                    # Trova contorno del balloon dalla mask
+                    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                    if len(contours) > 0:
+                        # Prendi tutti i contorni (potrebbero essere più righe di testo)
+                        all_polygons = []
+                        for contour in contours:
+                            if cv2.contourArea(contour) > 20:  # Filtra rumore
+                                polygon = contour.reshape(-1, 2).tolist()
+                                all_polygons.append(polygon)
+                        detection_data['polygons'] = all_polygons
+                        detection_data['num_polygons'] = len(all_polygons)
+                        # ✅ MASK COME BACCHETTA MAGICA (0/255)
+                        detection_data['mask'] = mask.tolist()
+                        detection_data['mask_shape'] = [h, w]
+                        # Disegna contorni sull'immagine
+                        cv2.drawContours(annotated, contours, -1, (0, 255, 0), 2)
+                        # Aggiungi etichetta
+                        x, y = int(box[0]), int(box[1]) - 10
+                        cv2.putText(annotated, f"B{i+1}", (x, y),
+                                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+                        detection_data['has_mask'] = True
+                        print(f"   ✅ Balloon {i+1} segmentato: {len(all_polygons)} poligoni")
+                    else:
+                        detection_data['has_mask'] = False
+                        print(f"   ⚠️ Balloon {i+1}: nessun contorno trovato")
                 else:
                     detection_data['has_mask'] = False
+                    print(f"   ⚠️ Balloon {i+1}: mask vuota")
+            except Exception as e:
+                print(f"   ❌ Errore segmentazione balloon {i+1}: {str(e)}")
                 detection_data['has_mask'] = False
+                detection_data['error'] = str(e)
+        else:
+            # Solo detection, niente segmentation
+            detection_data['has_mask'] = False
+            # Disegna solo bounding box
+            cv2.rectangle(annotated,
+                         (int(box[0]), int(box[1])),
+                         (int(box[2]), int(box[3])),
+                         (255, 0, 0), 2)
+            cv2.putText(annotated, f"B{i+1}", (int(box[0]), int(box[1]) - 10),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
+        output['detections'].append(detection_data)
     print(f"4. Completato! {output['num_balloons']} balloon processati")
     return annotated, output
 # Interface Gradio
+with gr.Blocks(title="Balloon Detection & Segmentation") as demo:
+    gr.Markdown("# 🎈 Rilevamento e Segmentazione Balloon (stile comic-translate)")
+    gr.Markdown("**Sistema leggero senza SAM - usa image processing per creare mask precise**")
     with gr.Row():
         with gr.Column():
                 step=0.05,
                 label="🎯 Confidenza Detection"
             )
+            use_segmentation = gr.Checkbox(
+                value=True,
+                label="✂️ Abilita Segmentazione (crea mask precise)"
+            )
+            process_btn = gr.Button("🔍 Processa", variant="primary", size="lg")
         with gr.Column():
+            output_image = gr.Image(label="✅ Risultato")
+            output_json = gr.JSON(label="📊 Dati Output")
     gr.Markdown("""
     ### 📖 Formato Output per Ogni Balloon:
+    - **box**: Rettangolo di detection {x1, y1, x2, y2}
+    - **mask**: Array 2D (0/255) - **mask precisa del testo** (come bacchetta magica!)
+    - **polygons**: Lista di poligoni che descrivono le aree di testo
     - **mask_shape**: [altezza, larghezza] della mask
+    ### 🎯 Come Funziona:
+    1. **Detection**: YOLO trova i balloon (veloce, su CPU)
+    2. **Segmentation**: Image processing crea mask del testo (velocissimo!)
+    3. Nessun modello pesante come SAM = **molto più veloce**
+    ### 💡 Differenza con SAM:
+    - ✅ **Molto più veloce** (< 1 secondo vs 15-20 secondi)
+    - ✅ **Funziona bene su CPU** gratuita
+    - ⚠️ Segmenta il **testo** dentro il balloon, non il contorno del balloon
+    - Perfetto per **inpainting** (rimuovere testo)
     """)
+    process_btn.click(
         fn=detect_and_segment_balloons,
+        inputs=[input_image, confidence, use_segmentation],
         outputs=[output_image, output_json]
     )
+print("✅ App avviata!")
 demo.launch()