File size: 7,542 Bytes
2d1d7b4
e44cb9e
2d1d7b4
e44cb9e
af10e17
e44cb9e
db1b62c
af10e17
 
e44cb9e
2d1d7b4
db1b62c
e44cb9e
 
2d1d7b4
e44cb9e
 
db1b62c
e44cb9e
 
db1b62c
 
e44cb9e
db1b62c
 
e44cb9e
 
db1b62c
 
e44cb9e
db1b62c
 
 
 
 
 
e44cb9e
 
db1b62c
e44cb9e
db1b62c
e44cb9e
 
 
 
af10e17
db1b62c
e44cb9e
 
db1b62c
e44cb9e
db1b62c
e44cb9e
db1b62c
 
e44cb9e
db1b62c
e44cb9e
 
 
db1b62c
e44cb9e
db1b62c
 
e44cb9e
2d1d7b4
f70f4f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d1d7b4
f70f4f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e44cb9e
2d1d7b4
e44cb9e
2d1d7b4
e44cb9e
 
db1b62c
e44cb9e
 
 
 
 
 
 
db1b62c
e44cb9e
db1b62c
 
e44cb9e
db1b62c
 
e44cb9e
 
db1b62c
 
e44cb9e
db1b62c
 
e44cb9e
 
db1b62c
 
 
e44cb9e
db1b62c
e44cb9e
db1b62c
 
 
 
e44cb9e
db1b62c
 
 
 
 
 
af10e17
 
e44cb9e
2d1d7b4
db1b62c
2d1d7b4
e44cb9e
 
db1b62c
e44cb9e
 
 
 
 
 
 
db1b62c
e44cb9e
db1b62c
e44cb9e
db1b62c
e44cb9e
db1b62c
e44cb9e
db1b62c
e44cb9e
 
db1b62c
e44cb9e
 
 
 
db1b62c
e44cb9e
db1b62c
e44cb9e
 
 
db1b62c
e44cb9e
 
 
 
f70f4f3
db1b62c
 
 
e44cb9e
db1b62c
e44cb9e
 
db1b62c
 
e44cb9e
db1b62c
 
2a67dc1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
"""
Enhanced speech bubble detection for manga
"""
import cv2
import numpy as np
from shapely.geometry import Polygon
from shapely.ops import unary_union


def detect_speech_bubbles(img_pil, min_area=500, max_area=None, debug=False):
    """
    Basic speech bubble detection using adaptive threshold + morphology.
    Returns:
        List of bubble polygons [(x,y), ...]
    """
    img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    h, w = gray.shape
    if max_area is None:
        max_area = (h * w) // 4  # bubbles should not be entire page

    th = cv2.adaptiveThreshold(
        gray,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        35,
        10,
    )

    inv = 255 - th  # bubbles β†’ white

    kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    cleaned = cv2.morphologyEx(inv, cv2.MORPH_CLOSE, kernel_close, iterations=2)

    kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel_open, iterations=1)

    contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bubbles = []
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area < min_area or area > max_area:
            continue

        x, y, bw, bh = cv2.boundingRect(cnt)
        aspect_ratio = max(bw, bh) / (min(bw, bh) + 1)
        if aspect_ratio > 5:
            continue

        perimeter = cv2.arcLength(cnt, True)
        if perimeter == 0:
            continue
        circularity = 4 * np.pi * area / (perimeter * perimeter + 1)

        epsilon = 0.01 * perimeter
        approx = cv2.approxPolyDP(cnt, epsilon, True)
        poly = [(int(p[0][0]), int(p[0][1])) for p in approx]

        bubbles.append(poly)

    print(f"🎈 detect_speech_bubbles: {len(bubbles)} candidates")
    return bubbles

def detect_bubbles_heuristic(img_pil, min_area=500, debug=False):
    # 1. Convert to OpenCv format
    img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    h, w = img.shape[:2]
    
    # 2. HSV Masking (Bright regions)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lower_white = np.array([0, 0, 215])     
    upper_white = np.array([180, 40, 255])
    mask = cv2.inRange(hsv, lower_white, upper_white)

    # Clean up mask
    kernel_close = np.ones((15, 15), np.uint8) 
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
    kernel_open = np.ones((5, 5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)

    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    bubbles = []
    
    # Pre-compute edge map for texture checking
    # Canny detects text characters very well
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)

    for cnt in contours:
        area = cv2.contourArea(cnt)
        
        # --- Standard Geometric Filters ---
        if area < min_area or area > (h * w * 0.4): continue
        
        x, y, bw, bh = cv2.boundingRect(cnt)
        aspect_ratio = float(bw) / bh
        if aspect_ratio < 0.2 or aspect_ratio > 5.0: continue

        hull = cv2.convexHull(cnt)
        hull_area = cv2.contourArea(hull)
        if hull_area == 0: continue
        solidity = float(area) / hull_area
        if solidity < 0.7: continue
        
        # --- NEW: "Has Text?" Filter ---
        # 1. Create a mask for just this current contour
        curr_mask = np.zeros_like(gray)
        cv2.drawContours(curr_mask, [cnt], -1, 255, -1)
        
        # 2. Look at the Canny Edges INSIDE this contour
        # Text creates a lot of high-frequency edges. A plain white shirt does not.
        bubble_edges = cv2.bitwise_and(edges, edges, mask=curr_mask)
        edge_pixel_count = cv2.countNonZero(bubble_edges)
        
        # Density = Edge Pixels / Total Area
        # Typical text bubbles have density > 0.02 (2%)
        # Empty white walls usually have density < 0.01
        density = edge_pixel_count / area
        
        if density < 0.015: 
            if debug: print(f"Skipping white blob (Empty): density={density:.4f}")
            continue

        # Simplify shape and add
        epsilon = 0.005 * cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, epsilon, True)
        poly = [(int(p[0][0]), int(p[0][1])) for p in approx]
        bubbles.append(poly)

    print(f"🎈 Heuristic Bubbles (HSV + TextCheck): {len(bubbles)}")
    return bubbles
def merge_overlapping_bubbles(bubbles, iou_threshold=0.3):
    """
    Merge bubbles that overlap significantly.
    """
    if len(bubbles) <= 1:
        return bubbles

    shapes = []
    for b in bubbles:
        try:
            p = Polygon(b)
            if not p.is_valid:
                p = p.buffer(0)
            shapes.append(p)
        except Exception:
            continue

    merged_polys = []
    used = set()

    for i, s1 in enumerate(shapes):
        if i in used:
            continue

        group = [s1]
        used.add(i)

        for j, s2 in enumerate(shapes[i + 1 :], start=i + 1):
            if j in used:
                continue
            inter = s1.intersection(s2).area
            union = s1.union(s2).area
            iou = inter / union if union > 0 else 0.0
            if iou > iou_threshold:
                group.append(s2)
                used.add(j)

        merged_shape = unary_union(group)
        if merged_shape.geom_type == "Polygon":
            merged_polys.append([(int(x), int(y)) for x, y in merged_shape.exterior.coords[:-1]])
        else:
            for g in merged_shape.geoms:
                if g.geom_type == "Polygon":
                    merged_polys.append([(int(x), int(y)) for x, y in g.exterior.coords[:-1]])

    print(f"πŸ”„ merge_overlapping_bubbles: {len(bubbles)} β†’ {len(merged_polys)}")
    return merged_polys


def filter_nested_bubbles(bubbles):
    """
    Remove bubbles completely inside other bubbles; keep larger ones.
    """
    if len(bubbles) <= 1:
        return bubbles

    shapes = []
    for b in bubbles:
        try:
            p = Polygon(b)
            if not p.is_valid:
                p = p.buffer(0)
            shapes.append((p, b))
        except Exception:
            continue

    shapes.sort(key=lambda x: x[0].area, reverse=True)

    filtered = []
    for i, (s1, poly1) in enumerate(shapes):
        is_nested = False
        for j, (s2, poly2) in enumerate(shapes):
            if i == j:
                continue
            if s2.contains(s1):
                is_nested = True
                break
        if not is_nested:
            filtered.append(poly1)

    if len(filtered) < len(bubbles):
        print(f"πŸ—‘οΈ filter_nested_bubbles: removed {len(bubbles) - len(filtered)} nested")
    return filtered


def detect_speech_bubbles_robust(img_pil, min_area=500, merge_overlaps=True, filter_nested_flag=True):
    """
    Robust bubble detection with post-processing.
    This is the recommended function to use.
    """
    bubbles = detect_bubbles_heuristic(img_pil, min_area=min_area)

    if not bubbles:
        print("⚠️ detect_speech_bubbles_robust: no initial bubbles")
        return []

    if merge_overlaps:
        bubbles = merge_overlapping_bubbles(bubbles)

    if filter_nested_flag:
        bubbles = filter_nested_bubbles(bubbles)

    print(f"βœ… detect_speech_bubbles_robust: final {len(bubbles)} bubbles")
    return bubbles