File size: 8,345 Bytes
2a569bb
cddfd6c
2a569bb
 
 
4470ab3
 
958a84c
cddfd6c
958a84c
 
cddfd6c
 
 
958a84c
cddfd6c
958a84c
 
cddfd6c
958a84c
 
cddfd6c
958a84c
 
cddfd6c
 
 
 
 
 
 
958a84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cddfd6c
958a84c
 
 
cddfd6c
958a84c
 
 
cddfd6c
958a84c
cddfd6c
 
958a84c
a38cac9
958a84c
 
cddfd6c
 
958a84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a38cac9
958a84c
 
 
 
a38cac9
 
 
958a84c
 
 
 
 
 
 
 
 
 
 
 
cddfd6c
958a84c
2a569bb
 
 
a38cac9
2a569bb
958a84c
4470ab3
2a569bb
958a84c
2a569bb
4470ab3
 
 
 
 
a38cac9
4470ab3
 
958a84c
4470ab3
 
 
 
 
958a84c
a38cac9
cddfd6c
 
a38cac9
cddfd6c
 
 
 
 
 
 
4470ab3
a38cac9
958a84c
2a569bb
958a84c
 
a38cac9
958a84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a38cac9
 
958a84c
 
a38cac9
 
 
 
958a84c
a38cac9
cddfd6c
 
2a569bb
958a84c
2a569bb
 
958a84c
 
 
2a569bb
 
 
958a84c
a38cac9
958a84c
2a569bb
 
cddfd6c
958a84c
2a569bb
 
958a84c
 
 
 
 
 
 
 
 
 
2a569bb
 
958a84c
 
a38cac9
2a569bb
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import gradio as gr
from ultralytics import YOLO
import numpy as np
import cv2

detector = YOLO('best.pt')

def get_inpaint_bboxes(xyxy, img):
    """
    Algoritmo di segmentazione del testo ispirato a comic-translate.
    Trova le aree di testo dentro il balloon usando tecniche avanzate.
    """
    x1, y1, x2, y2 = [int(coord) for coord in xyxy]
    
    # Estrai regione balloon
    h, w = img.shape[:2]
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w, x2), min(h, y2)
    
    if x2 <= x1 or y2 <= y1:
        return None
    
    balloon_region = img[y1:y2, x1:x2].copy()
    region_h, region_w = balloon_region.shape[:2]
    
    # Converti in grayscale
    if len(balloon_region.shape) == 3:
        gray = cv2.cvtColor(balloon_region, cv2.COLOR_RGB2GRAY)
    else:
        gray = balloon_region
    
    # Step 1: Equalizzazione istogramma per migliorare contrasto
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)
    
    # Step 2: Threshold adattivo (migliore per testo su sfondi variabili)
    binary = cv2.adaptiveThreshold(
        enhanced, 255, 
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY_INV,  # Inverso: testo bianco, sfondo nero
        blockSize=15,  # Dimensione blocco locale
        C=10  # Costante sottratta dalla media
    )
    
    # Step 3: Rimuovi rumore con morphological operations
    # Kernel verticale per connettere parti di caratteri
    kernel_v = np.ones((2, 1), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_v)
    
    # Kernel orizzontale per connettere caratteri vicini
    kernel_h = np.ones((1, 3), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel_h)
    
    # Dilata leggermente per unire caratteri di una parola
    kernel_dilate = np.ones((3, 3), np.uint8)
    dilated = cv2.dilate(binary, kernel_dilate, iterations=1)
    
    # Step 4: Trova contorni delle aree di testo
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Step 5: Filtra contorni e crea bounding box
    text_bboxes = []
    mask = np.zeros((region_h, region_w), dtype=np.uint8)
    
    for contour in contours:
        area = cv2.contourArea(contour)
        
        # Calcola aspect ratio per filtrare meglio
        rect = cv2.boundingRect(contour)
        rx, ry, rw, rh = rect
        aspect_ratio = rw / float(rh) if rh > 0 else 0
        
        # Filtra: area minima, dimensioni ragionevoli, aspect ratio valido
        if (area > 50 and  # Area minima
            rw > 3 and rh > 3 and  # Dimensioni minime
            rw < region_w * 0.95 and rh < region_h * 0.95 and  # Non troppo grande
            aspect_ratio > 0.1 and aspect_ratio < 15):  # Aspect ratio ragionevole
            
            # Disegna il contorno riempito sulla mask
            cv2.drawContours(mask, [contour], -1, 255, -1)
            
            # Salva bounding box in coordinate assolute
            text_bboxes.append({
                'x1': x1 + rx,
                'y1': y1 + ry,
                'x2': x1 + rx + rw,
                'y2': y1 + ry + rh,
                'area': area
            })
    
    if not text_bboxes:
        return None
    
    # Step 6: Espandi leggermente le mask per catturare anti-aliasing del testo
    kernel_expand = np.ones((3, 3), np.uint8)
    mask = cv2.dilate(mask, kernel_expand, iterations=1)
    
    # Crea mask full-size
    full_mask = np.zeros((h, w), dtype=np.uint8)
    full_mask[y1:y2, x1:x2] = mask
    
    return full_mask, text_bboxes

def detect_and_segment_balloons(image, confidence):
    if image is None:
        return None, {"error": "Nessuna immagine"}
    
    print(f"Rilevamento con confidenza: {confidence}")
    
    # Detection
    detection_results = detector(image, conf=confidence, verbose=False)
    
    output = {'num_balloons': 0, 'detections': []}
    
    if detection_results[0].boxes is None or len(detection_results[0].boxes) == 0:
        print("Nessun balloon rilevato")
        return image, output
    
    output['num_balloons'] = len(detection_results[0].boxes)
    print(f"Trovati {output['num_balloons']} balloon")
    
    annotated = image.copy()
    h, w = image.shape[:2]
    
    for i in range(len(detection_results[0].boxes)):
        box = detection_results[0].boxes.xyxy[i].cpu().numpy()
        conf = float(detection_results[0].boxes.conf[i].cpu().numpy())
        
        print(f"Segmentazione balloon {i+1}...")
        
        detection_data = {
            'balloon_id': i + 1,
            'balloon_box': {
                'x1': int(box[0]),
                'y1': int(box[1]),
                'x2': int(box[2]),
                'y2': int(box[3])
            },
            'confidence': round(conf, 3)
        }
        
        try:
            result = get_inpaint_bboxes(box, image)
            
            if result is not None:
                mask, text_bboxes = result
                
                if text_bboxes:
                    detection_data['text_regions'] = text_bboxes
                    detection_data['num_text_regions'] = len(text_bboxes)
                    
                    # Trova contorni dalla mask per visualizzazione
                    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    
                    # Disegna contorni verdi sulla mask del testo
                    cv2.drawContours(annotated, contours, -1, (0, 255, 0), 2)
                    
                    # Disegna anche i singoli bounding box in blu
                    for tb in text_bboxes:
                        cv2.rectangle(annotated, 
                                    (tb['x1'], tb['y1']), 
                                    (tb['x2'], tb['y2']), 
                                    (255, 165, 0), 1)  # Arancione
                    
                    # Etichetta balloon
                    cv2.putText(annotated, f"B{i+1} ({len(text_bboxes)} txt)", 
                               (int(box[0]), int(box[1]) - 10), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                    
                    detection_data['has_segmentation'] = True
                    print(f"  βœ… {len(text_bboxes)} aree di testo segmentate")
                else:
                    detection_data['text_regions'] = []
                    detection_data['has_segmentation'] = False
                    print(f"  ⚠️ Nessun testo trovato")
            else:
                detection_data['text_regions'] = []
                detection_data['has_segmentation'] = False
                print(f"  ⚠️ Segmentazione fallita")
                
        except Exception as e:
            print(f"  ❌ Errore: {str(e)}")
            detection_data['text_regions'] = []
            detection_data['has_segmentation'] = False
            detection_data['error'] = str(e)
        
        output['detections'].append(detection_data)
    
    print("Completato!")
    return annotated, output

with gr.Blocks() as demo:
    gr.Markdown("# 🎈 Segmentazione Testo Balloons (Algorithm Comic-Translate)")
    gr.Markdown("**Usa threshold adattivo e morphological operations per segmentare testo preciso**")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="πŸ“· Fumetto")
            confidence = gr.Slider(0.1, 1.0, 0.25, 0.05, label="🎯 Confidenza")
            segment_btn = gr.Button("βœ‚οΈ Segmenta", variant="primary")
            
        with gr.Column():
            output_image = gr.Image(label="βœ… Risultato")
            output_json = gr.JSON(label="πŸ“Š Dati")
    
    gr.Markdown("""
    ### πŸ”§ Algoritmo:
    1. **CLAHE**: Equalizzazione istogramma adattiva
    2. **Threshold Adattivo**: Funziona su sfondi variabili
    3. **Morphological Ops**: Connette caratteri e rimuove rumore
    4. **Filtraggio**: Area, aspect ratio, dimensioni
    
    ### πŸ“– Output:
    - **Verde**: Contorni mask del testo
    - **Arancione**: Bounding box individuali
    - **text_regions**: Coordinate per OCR/inpainting
    """)
    
    segment_btn.click(
        fn=detect_and_segment_balloons,
        inputs=[input_image, confidence],
        outputs=[output_image, output_json]
    )

demo.launch()