""" Enhanced speech bubble detection for manga """ import cv2 import numpy as np from shapely.geometry import Polygon from shapely.ops import unary_union def detect_speech_bubbles(img_pil, min_area=500, max_area=None, debug=False): """ Basic speech bubble detection using adaptive threshold + morphology. Returns: List of bubble polygons [(x,y), ...] """ img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) h, w = gray.shape if max_area is None: max_area = (h * w) // 4 # bubbles should not be entire page th = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 10, ) inv = 255 - th # bubbles → white kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) cleaned = cv2.morphologyEx(inv, cv2.MORPH_CLOSE, kernel_close, iterations=2) kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel_open, iterations=1) contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) bubbles = [] for cnt in contours: area = cv2.contourArea(cnt) if area < min_area or area > max_area: continue x, y, bw, bh = cv2.boundingRect(cnt) aspect_ratio = max(bw, bh) / (min(bw, bh) + 1) if aspect_ratio > 5: continue perimeter = cv2.arcLength(cnt, True) if perimeter == 0: continue circularity = 4 * np.pi * area / (perimeter * perimeter + 1) epsilon = 0.01 * perimeter approx = cv2.approxPolyDP(cnt, epsilon, True) poly = [(int(p[0][0]), int(p[0][1])) for p in approx] bubbles.append(poly) print(f"🎈 detect_speech_bubbles: {len(bubbles)} candidates") return bubbles def detect_bubbles_heuristic(img_pil, min_area=500, debug=False): # 1. Convert to OpenCv format img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) h, w = img.shape[:2] # 2. HSV Masking (Bright regions) hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) lower_white = np.array([0, 0, 215]) upper_white = np.array([180, 40, 255]) mask = cv2.inRange(hsv, lower_white, upper_white) # Clean up mask kernel_close = np.ones((15, 15), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close) kernel_open = np.ones((5, 5), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open) contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) bubbles = [] # Pre-compute edge map for texture checking # Canny detects text characters very well gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) edges = cv2.Canny(gray, 100, 200) for cnt in contours: area = cv2.contourArea(cnt) # --- Standard Geometric Filters --- if area < min_area or area > (h * w * 0.4): continue x, y, bw, bh = cv2.boundingRect(cnt) aspect_ratio = float(bw) / bh if aspect_ratio < 0.2 or aspect_ratio > 5.0: continue hull = cv2.convexHull(cnt) hull_area = cv2.contourArea(hull) if hull_area == 0: continue solidity = float(area) / hull_area if solidity < 0.7: continue # --- NEW: "Has Text?" Filter --- # 1. Create a mask for just this current contour curr_mask = np.zeros_like(gray) cv2.drawContours(curr_mask, [cnt], -1, 255, -1) # 2. Look at the Canny Edges INSIDE this contour # Text creates a lot of high-frequency edges. A plain white shirt does not. bubble_edges = cv2.bitwise_and(edges, edges, mask=curr_mask) edge_pixel_count = cv2.countNonZero(bubble_edges) # Density = Edge Pixels / Total Area # Typical text bubbles have density > 0.02 (2%) # Empty white walls usually have density < 0.01 density = edge_pixel_count / area if density < 0.015: if debug: print(f"Skipping white blob (Empty): density={density:.4f}") continue # Simplify shape and add epsilon = 0.005 * cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, epsilon, True) poly = [(int(p[0][0]), int(p[0][1])) for p in approx] bubbles.append(poly) print(f"🎈 Heuristic Bubbles (HSV + TextCheck): {len(bubbles)}") return bubbles def merge_overlapping_bubbles(bubbles, iou_threshold=0.3): """ Merge bubbles that overlap significantly. """ if len(bubbles) <= 1: return bubbles shapes = [] for b in bubbles: try: p = Polygon(b) if not p.is_valid: p = p.buffer(0) shapes.append(p) except Exception: continue merged_polys = [] used = set() for i, s1 in enumerate(shapes): if i in used: continue group = [s1] used.add(i) for j, s2 in enumerate(shapes[i + 1 :], start=i + 1): if j in used: continue inter = s1.intersection(s2).area union = s1.union(s2).area iou = inter / union if union > 0 else 0.0 if iou > iou_threshold: group.append(s2) used.add(j) merged_shape = unary_union(group) if merged_shape.geom_type == "Polygon": merged_polys.append([(int(x), int(y)) for x, y in merged_shape.exterior.coords[:-1]]) else: for g in merged_shape.geoms: if g.geom_type == "Polygon": merged_polys.append([(int(x), int(y)) for x, y in g.exterior.coords[:-1]]) print(f"🔄 merge_overlapping_bubbles: {len(bubbles)} → {len(merged_polys)}") return merged_polys def filter_nested_bubbles(bubbles): """ Remove bubbles completely inside other bubbles; keep larger ones. """ if len(bubbles) <= 1: return bubbles shapes = [] for b in bubbles: try: p = Polygon(b) if not p.is_valid: p = p.buffer(0) shapes.append((p, b)) except Exception: continue shapes.sort(key=lambda x: x[0].area, reverse=True) filtered = [] for i, (s1, poly1) in enumerate(shapes): is_nested = False for j, (s2, poly2) in enumerate(shapes): if i == j: continue if s2.contains(s1): is_nested = True break if not is_nested: filtered.append(poly1) if len(filtered) < len(bubbles): print(f"🗑️ filter_nested_bubbles: removed {len(bubbles) - len(filtered)} nested") return filtered def detect_speech_bubbles_robust(img_pil, min_area=500, merge_overlaps=True, filter_nested_flag=True): """ Robust bubble detection with post-processing. This is the recommended function to use. """ bubbles = detect_bubbles_heuristic(img_pil, min_area=min_area) if not bubbles: print("⚠️ detect_speech_bubbles_robust: no initial bubbles") return [] if merge_overlaps: bubbles = merge_overlapping_bubbles(bubbles) if filter_nested_flag: bubbles = filter_nested_bubbles(bubbles) print(f"✅ detect_speech_bubbles_robust: final {len(bubbles)} bubbles") return bubbles