Spaces:

Ronny56
/

yolo

Sleeping

File size: 8,345 Bytes

2a569bb
cddfd6c
2a569bb
 
 
4470ab3
 
958a84c
cddfd6c
958a84c
 
cddfd6c
 
 
958a84c
cddfd6c
958a84c
 
cddfd6c
958a84c
 
cddfd6c
958a84c
 
cddfd6c
 
 
 
 
 
 
958a84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cddfd6c
958a84c
 
 
cddfd6c
958a84c
 
 
cddfd6c
958a84c
cddfd6c
 
958a84c
a38cac9
958a84c
 
cddfd6c
 
958a84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a38cac9
958a84c
 
 
 
a38cac9
 
 
958a84c
 
 
 
 
 
 
 
 
 
 
 
cddfd6c
958a84c
2a569bb
 
 
a38cac9
2a569bb
958a84c
4470ab3
2a569bb
958a84c
2a569bb
4470ab3
 
 
 
 
a38cac9
4470ab3
 
958a84c
4470ab3
 
 
 
 
958a84c
a38cac9
cddfd6c
 
a38cac9
cddfd6c
 
 
 
 
 
 
4470ab3
a38cac9
958a84c
2a569bb
958a84c
 
a38cac9
958a84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a38cac9
 
958a84c
 
a38cac9
 
 
 
958a84c
a38cac9
cddfd6c
 
2a569bb
958a84c
2a569bb
 
958a84c
 
 
2a569bb
 
 
958a84c
a38cac9
958a84c
2a569bb
 
cddfd6c
958a84c
2a569bb
 
958a84c
 
 
 
 
 
 
 
 
 
2a569bb
 
958a84c
 
a38cac9
2a569bb

import gradio as gr
from ultralytics import YOLO
import numpy as np
import cv2

detector = YOLO('best.pt')

def get_inpaint_bboxes(xyxy, img):
    """
    Algoritmo di segmentazione del testo ispirato a comic-translate.
    Trova le aree di testo dentro il balloon usando tecniche avanzate.
    """
    x1, y1, x2, y2 = [int(coord) for coord in xyxy]
    
    # Estrai regione balloon
    h, w = img.shape[:2]
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w, x2), min(h, y2)
    
    if x2 <= x1 or y2 <= y1:
        return None
    
    balloon_region = img[y1:y2, x1:x2].copy()
    region_h, region_w = balloon_region.shape[:2]
    
    # Converti in grayscale
    if len(balloon_region.shape) == 3:
        gray = cv2.cvtColor(balloon_region, cv2.COLOR_RGB2GRAY)
    else:
        gray = balloon_region
    
    # Step 1: Equalizzazione istogramma per migliorare contrasto
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)
    
    # Step 2: Threshold adattivo (migliore per testo su sfondi variabili)
    binary = cv2.adaptiveThreshold(
        enhanced, 255, 
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV,  # Inverso: testo bianco, sfondo nero
        blockSize=15,  # Dimensione blocco locale
        C=10  # Costante sottratta dalla media
    )
    
    # Step 3: Rimuovi rumore con morphological operations
    # Kernel verticale per connettere parti di caratteri
    kernel_v = np.ones((2, 1), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_v)
    
    # Kernel orizzontale per connettere caratteri vicini
    kernel_h = np.ones((1, 3), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_h)
    
    # Dilata leggermente per unire caratteri di una parola
    kernel_dilate = np.ones((3, 3), np.uint8)
    dilated = cv2.dilate(binary, kernel_dilate, iterations=1)
    
    # Step 4: Trova contorni delle aree di testo
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Step 5: Filtra contorni e crea bounding box
    text_bboxes = []
    mask = np.zeros((region_h, region_w), dtype=np.uint8)
    
    for contour in contours:
        area = cv2.contourArea(contour)
        
        # Calcola aspect ratio per filtrare meglio
        rect = cv2.boundingRect(contour)
        rx, ry, rw, rh = rect
        aspect_ratio = rw / float(rh) if rh > 0 else 0
        
        # Filtra: area minima, dimensioni ragionevoli, aspect ratio valido
        if (area > 50 and  # Area minima
            rw > 3 and rh > 3 and  # Dimensioni minime
            rw < region_w * 0.95 and rh < region_h * 0.95 and  # Non troppo grande
            aspect_ratio > 0.1 and aspect_ratio < 15):  # Aspect ratio ragionevole
            
            # Disegna il contorno riempito sulla mask
            cv2.drawContours(mask, [contour], -1, 255, -1)
            
            # Salva bounding box in coordinate assolute
            text_bboxes.append({
                'x1': x1 + rx,
                'y1': y1 + ry,
                'x2': x1 + rx + rw,
                'y2': y1 + ry + rh,
                'area': area
            })
    
    if not text_bboxes:
        return None
    
    # Step 6: Espandi leggermente le mask per catturare anti-aliasing del testo
    kernel_expand = np.ones((3, 3), np.uint8)
    mask = cv2.dilate(mask, kernel_expand, iterations=1)
    
    # Crea mask full-size
    full_mask = np.zeros((h, w), dtype=np.uint8)
    full_mask[y1:y2, x1:x2] = mask
    
    return full_mask, text_bboxes

def detect_and_segment_balloons(image, confidence):
    if image is None:
        return None, {"error": "Nessuna immagine"}
    
    print(f"Rilevamento con confidenza: {confidence}")
    
    # Detection
    detection_results = detector(image, conf=confidence, verbose=False)
    
    output = {'num_balloons': 0, 'detections': []}
    
    if detection_results[0].boxes is None or len(detection_results[0].boxes) == 0:
        print("Nessun balloon rilevato")
        return image, output
    
    output['num_balloons'] = len(detection_results[0].boxes)
    print(f"Trovati {output['num_balloons']} balloon")
    
    annotated = image.copy()
    h, w = image.shape[:2]
    
    for i in range(len(detection_results[0].boxes)):
        box = detection_results[0].boxes.xyxy[i].cpu().numpy()
        conf = float(detection_results[0].boxes.conf[i].cpu().numpy())
        
        print(f"Segmentazione balloon {i+1}...")
        
        detection_data = {
            'balloon_id': i + 1,
            'balloon_box': {
                'x1': int(box[0]),
                'y1': int(box[1]),
                'x2': int(box[2]),
                'y2': int(box[3])
            },
            'confidence': round(conf, 3)
        }
        
        try:
            result = get_inpaint_bboxes(box, image)
            
            if result is not None:
                mask, text_bboxes = result
                
                if text_bboxes:
                    detection_data['text_regions'] = text_bboxes
                    detection_data['num_text_regions'] = len(text_bboxes)
                    
                    # Trova contorni dalla mask per visualizzazione
                    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    
                    # Disegna contorni verdi sulla mask del testo
                    cv2.drawContours(annotated, contours, -1, (0, 255, 0), 2)
                    
                    # Disegna anche i singoli bounding box in blu
                    for tb in text_bboxes:
                        cv2.rectangle(annotated, 
                                    (tb['x1'], tb['y1']), 
                                    (tb['x2'], tb['y2']), 
                                    (255, 165, 0), 1)  # Arancione
                    
                    # Etichetta balloon
                    cv2.putText(annotated, f"B{i+1} ({len(text_bboxes)} txt)", 
                               (int(box[0]), int(box[1]) - 10), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                    
                    detection_data['has_segmentation'] = True
                    print(f"  ✅ {len(text_bboxes)} aree di testo segmentate")
                else:
                    detection_data['text_regions'] = []
                    detection_data['has_segmentation'] = False
                    print(f"  ⚠️ Nessun testo trovato")
            else:
                detection_data['text_regions'] = []
                detection_data['has_segmentation'] = False
                print(f"  ⚠️ Segmentazione fallita")
                
        except Exception as e:
            print(f"  ❌ Errore: {str(e)}")
            detection_data['text_regions'] = []
            detection_data['has_segmentation'] = False
            detection_data['error'] = str(e)
        
        output['detections'].append(detection_data)
    
    print("Completato!")
    return annotated, output

with gr.Blocks() as demo:
    gr.Markdown("# 🎈 Segmentazione Testo Balloons (Algorithm Comic-Translate)")
    gr.Markdown("**Usa threshold adattivo e morphological operations per segmentare testo preciso**")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="📷 Fumetto")
            confidence = gr.Slider(0.1, 1.0, 0.25, 0.05, label="🎯 Confidenza")
            segment_btn = gr.Button("✂️ Segmenta", variant="primary")
            
        with gr.Column():
            output_image = gr.Image(label="✅ Risultato")
            output_json = gr.JSON(label="📊 Dati")
    
    gr.Markdown("""
    ### 🔧 Algoritmo:
    1. **CLAHE**: Equalizzazione istogramma adattiva
    2. **Threshold Adattivo**: Funziona su sfondi variabili
    3. **Morphological Ops**: Connette caratteri e rimuove rumore
    4. **Filtraggio**: Area, aspect ratio, dimensioni
    
    ### 📖 Output:
    - **Verde**: Contorni mask del testo
    - **Arancione**: Bounding box individuali
    - **text_regions**: Coordinate per OCR/inpainting
    """)
    
    segment_btn.click(
        fn=detect_and_segment_balloons,
        inputs=[input_image, confidence],
        outputs=[output_image, output_json]
    )

demo.launch()