File size: 13,582 Bytes
54a7b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ab378c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54a7b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ab378c
54a7b7b
 
2ab378c
54a7b7b
 
 
 
 
2ab378c
54a7b7b
 
 
2ab378c
54a7b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ab378c
54a7b7b
 
 
 
 
 
 
 
2ab378c
54a7b7b
 
2ab378c
54a7b7b
 
 
2ab378c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54a7b7b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
from ultralytics import YOLO
import numpy as np
import cv2

# Global cache for YOLO models to avoid reloading on every call
_yolo_model_cache = {}

# Configuration for long image handling
MAX_ASPECT_RATIO = 3.0  # When height/width > 3, start slicing
MIN_CHUNK_HEIGHT = 800  # Minimum chunk height in pixels
MAX_CHUNK_HEIGHT = 1500  # Target chunk height
GUTTER_MIN_HEIGHT = 10  # Minimum gutter height to consider valid
OVERLAP_SIZE = 200  # Fallback overlap if no gutter found
WHITE_THRESHOLD = 245  # Pixel value to consider "white"
BLACK_THRESHOLD = 15   # Pixel value to consider "black"
IOU_THRESHOLD = 0.5    # For removing duplicate detections

# Black bubble detection constants
BLACK_BUBBLE_THRESHOLD = 50  # Max intensity for black regions
BLACK_BUBBLE_MIN_AREA = 1000  # Minimum area in pixels
BLACK_BUBBLE_MAX_AREA_RATIO = 0.4  # Maximum bubble area relative to image
BLACK_BUBBLE_MIN_ASPECT = 0.2  # Minimum width/height ratio
BLACK_BUBBLE_MAX_ASPECT = 5.0  # Maximum width/height ratio


def detect_black_bubbles(image, min_area=None, max_area_ratio=None):
    """

    Detect black speech bubbles using OpenCV contour detection.

    Used as fallback when YOLO doesn't detect dark bubbles.

    

    Args:

        image: Input image (numpy array, BGR)

        min_area: Minimum bubble area in pixels (default: BLACK_BUBBLE_MIN_AREA)

        max_area_ratio: Maximum bubble area as ratio of image (default: BLACK_BUBBLE_MAX_AREA_RATIO)

        

    Returns:

        list: Detections in format [x1, y1, x2, y2, confidence, class_id]

    """
    if min_area is None:
        min_area = BLACK_BUBBLE_MIN_AREA
    if max_area_ratio is None:
        max_area_ratio = BLACK_BUBBLE_MAX_AREA_RATIO
    
    height, width = image.shape[:2]
    max_area = int(width * height * max_area_ratio)
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Find dark regions (invert threshold to get black areas)
    _, thresh = cv2.threshold(gray, BLACK_BUBBLE_THRESHOLD, 255, cv2.THRESH_BINARY_INV)
    
    # Morphological operations to clean up
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    detections = []
    
    for contour in contours:
        area = cv2.contourArea(contour)
        
        # Filter by area
        if area < min_area or area > max_area:
            continue
        
        # Get bounding box
        x, y, w, h = cv2.boundingRect(contour)
        
        # Filter by aspect ratio (bubbles are usually somewhat round/oval)
        aspect_ratio = w / h if h > 0 else 0
        if aspect_ratio < BLACK_BUBBLE_MIN_ASPECT or aspect_ratio > BLACK_BUBBLE_MAX_ASPECT:
            continue
        
        # Filter: bubble should be mostly filled (not just a thin border)
        rect_area = w * h
        fill_ratio = area / rect_area if rect_area > 0 else 0
        if fill_ratio < 0.3:  # At least 30% filled
            continue
        
        # Check if region is actually dark (verify it's a black bubble)
        roi = gray[y:y+h, x:x+w]
        mean_intensity = np.mean(roi)
        if mean_intensity > BLACK_BUBBLE_THRESHOLD + 30:  # Allow some tolerance
            continue
        
        # Calculate confidence based on fill ratio and darkness
        confidence = min(0.8, fill_ratio * (1 - mean_intensity / 255))
        
        x1, y1, x2, y2 = x, y, x + w, y + h
        detections.append([x1, y1, x2, y2, confidence, 0])  # class_id=0 for speech bubble
    
    return detections


def find_safe_cut_points(image, target_height=MAX_CHUNK_HEIGHT):
    """

    Find safe places to cut the image (white/black gutters between panels).

    

    Args:

        image: Input image (numpy array, BGR)

        target_height: Approximate target height for each chunk

        

    Returns:

        list: List of y-coordinates where it's safe to cut

    """
    height, width = image.shape[:2]
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Calculate mean intensity for each row
    row_means = np.mean(gray, axis=1)
    
    # Find rows that are mostly white or mostly black (gutters)
    is_gutter = (row_means > WHITE_THRESHOLD) | (row_means < BLACK_THRESHOLD)
    
    # Find continuous gutter regions
    gutter_regions = []
    start = None
    
    for i, is_gut in enumerate(is_gutter):
        if is_gut and start is None:
            start = i
        elif not is_gut and start is not None:
            if i - start >= GUTTER_MIN_HEIGHT:  # Only valid gutters
                gutter_regions.append((start, i, (start + i) // 2))  # start, end, center
            start = None
    
    # Handle gutter at the end
    if start is not None and height - start >= GUTTER_MIN_HEIGHT:
        gutter_regions.append((start, height, (start + height) // 2))
    
    if not gutter_regions:
        return []
    
    # Select cut points at approximately target_height intervals
    cut_points = []
    last_cut = 0
    
    for start, end, center in gutter_regions:
        # Check if this gutter is far enough from last cut
        if center - last_cut >= MIN_CHUNK_HEIGHT:
            # Check if we should cut here (approaching target height)
            if center - last_cut >= target_height * 0.7:
                cut_points.append(center)
                last_cut = center
    
    return cut_points


def calculate_iou(box1, box2):
    """Calculate Intersection over Union of two boxes."""
    x1_1, y1_1, x2_1, y2_1 = box1[:4]
    x1_2, y1_2, x2_2, y2_2 = box2[:4]
    
    # Calculate intersection
    x1_i = max(x1_1, x1_2)
    y1_i = max(y1_1, y1_2)
    x2_i = min(x2_1, x2_2)
    y2_i = min(y2_1, y2_2)
    
    if x2_i <= x1_i or y2_i <= y1_i:
        return 0.0
    
    intersection = (x2_i - x1_i) * (y2_i - y1_i)
    
    # Calculate union
    area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union = area1 + area2 - intersection
    
    return intersection / union if union > 0 else 0.0


def remove_duplicate_detections(detections, iou_threshold=IOU_THRESHOLD):
    """Remove duplicate detections based on IoU, keeping higher confidence ones."""
    if len(detections) <= 1:
        return detections
    
    # Sort by confidence (index 4) descending
    sorted_dets = sorted(detections, key=lambda x: x[4], reverse=True)
    
    keep = []
    while sorted_dets:
        best = sorted_dets.pop(0)
        keep.append(best)
        
        # Remove detections with high IoU
        sorted_dets = [
            det for det in sorted_dets 
            if calculate_iou(best, det) < iou_threshold
        ]
    
    return keep


def detect_bubbles_on_chunks(model, image, cut_points):
    """

    Detect bubbles on image chunks and merge results.

    

    Args:

        model: Loaded YOLO model

        image: Full image (numpy array)

        cut_points: List of y-coordinates to cut at

        

    Returns:

        list: Merged bubble detections with adjusted coordinates

    """
    height = image.shape[0]
    all_detections = []
    
    # Create chunk boundaries
    boundaries = [0] + cut_points + [height]
    
    print(f"Processing image in {len(boundaries) - 1} chunks...")
    
    for i in range(len(boundaries) - 1):
        y_start = boundaries[i]
        y_end = boundaries[i + 1]
        
        chunk = image[y_start:y_end]
        
        # Skip very small chunks
        if chunk.shape[0] < 50:
            continue
        
        # Detect bubbles in chunk
        results = model(chunk, verbose=False)[0]
        chunk_detections = results.boxes.data.tolist()
        
        # Adjust y-coordinates to original image space
        for det in chunk_detections:
            det[1] += y_start  # y1
            det[3] += y_start  # y2
            all_detections.append(det)
        
        print(f"  Chunk {i+1}: y={y_start}-{y_end}, found {len(chunk_detections)} bubbles")
    
    # Remove duplicates from overlapping regions
    merged = remove_duplicate_detections(all_detections)
    print(f"Total: {len(all_detections)} detections → {len(merged)} after merge")
    
    return merged


def detect_bubbles_with_fallback(model, image):
    """

    Detect bubbles using overlap-based slicing when no gutters found.

    

    Args:

        model: Loaded YOLO model

        image: Full image (numpy array)

        

    Returns:

        list: Merged bubble detections

    """
    height = image.shape[0]
    all_detections = []
    
    # Calculate chunks with overlap
    chunk_height = MAX_CHUNK_HEIGHT
    overlap = OVERLAP_SIZE
    
    y = 0
    chunk_num = 0
    
    print(f"No gutters found. Using overlap-based slicing...")
    
    while y < height:
        y_end = min(y + chunk_height, height)
        chunk = image[y:y_end]
        
        if chunk.shape[0] < 50:
            break
        
        # Detect bubbles
        results = model(chunk, verbose=False)[0]
        chunk_detections = results.boxes.data.tolist()
        
        # Adjust coordinates
        for det in chunk_detections:
            det[1] += y
            det[3] += y
            all_detections.append(det)
        
        chunk_num += 1
        print(f"  Chunk {chunk_num}: y={y}-{y_end}, found {len(chunk_detections)} bubbles")
        
        # Move to next chunk with overlap
        y = y_end - overlap
        if y_end >= height:
            break
    
    # Remove duplicates
    merged = remove_duplicate_detections(all_detections)
    print(f"Total: {len(all_detections)} detections → {len(merged)} after merge")
    
    return merged


def detect_bubbles(model_path, image_input, enable_black_bubble=True):
    """

    Detects bubbles in an image using a YOLOv8 model.

    Also detects black speech bubbles using OpenCV fallback (optional).

    Automatically handles long vertical images (webtoons) by slicing.

    

    Args:

        model_path (str): The file path to the YOLO model.

        image_input: File path to image OR numpy array (BGR).

        enable_black_bubble (bool): Whether to detect black bubbles using OpenCV.



    Returns:

        list: A list containing the coordinates, score and class_id of 

              the detected bubbles. Each detection also includes is_dark_bubble flag.

    """
    global _yolo_model_cache
    
    # Cache model to avoid reloading (~2-5s savings per image)
    if model_path not in _yolo_model_cache:
        print(f"Loading YOLO model from {model_path}...")
        _yolo_model_cache[model_path] = YOLO(model_path)
        print("YOLO model loaded and cached!")
    
    model = _yolo_model_cache[model_path]
    
    # Load image if path is provided
    if isinstance(image_input, str):
        image = cv2.imread(image_input)
    else:
        image = image_input
    
    if image is None:
        return []
    
    height, width = image.shape[:2]
    aspect_ratio = height / width
    
    # Get YOLO detections
    if aspect_ratio > MAX_ASPECT_RATIO:
        print(f"Long image detected: {width}x{height} (ratio: {aspect_ratio:.1f})")
        
        # Try to find safe cut points (gutters)
        cut_points = find_safe_cut_points(image)
        
        if cut_points:
            print(f"Found {len(cut_points)} safe cut points (gutters)")
            yolo_detections = detect_bubbles_on_chunks(model, image, cut_points)
        else:
            # Fallback to overlap-based slicing
            yolo_detections = detect_bubbles_with_fallback(model, image)
    else:
        # Normal image - process directly
        bubbles = model(image, verbose=False)[0]
        yolo_detections = bubbles.boxes.data.tolist()
    
    # Get black bubble detections using OpenCV (if enabled)
    if enable_black_bubble:
        black_bubble_detections = detect_black_bubbles(image)
    else:
        black_bubble_detections = []
    
    if black_bubble_detections:
        print(f"OpenCV found {len(black_bubble_detections)} potential black bubbles")
        
        # Mark black bubbles with a flag (append 1 to detection)
        for det in black_bubble_detections:
            det.append(1)  # is_dark_bubble = 1
        
        # Mark YOLO detections as normal bubbles
        for det in yolo_detections:
            if len(det) == 6:  # Only if not already marked
                det.append(0)  # is_dark_bubble = 0
        
        # Merge all detections and remove duplicates
        all_detections = yolo_detections + black_bubble_detections
        merged = remove_duplicate_detections(all_detections)
        
        print(f"Total: {len(yolo_detections)} YOLO + {len(black_bubble_detections)} black = {len(merged)} after merge")
        return merged
    else:
        # No black bubbles found, return YOLO only (add is_dark_bubble=0)
        for det in yolo_detections:
            if len(det) == 6:
                det.append(0)
        return yolo_detections


def clear_model_cache():
    """Clear the YOLO model cache to free memory."""
    global _yolo_model_cache
    _yolo_model_cache.clear()