Spaces:
Sleeping
Sleeping
| """ | |
| Enhanced speech bubble detection for manga | |
| """ | |
| import cv2 | |
| import numpy as np | |
| from shapely.geometry import Polygon | |
| from shapely.ops import unary_union | |
| def detect_speech_bubbles(img_pil, min_area=500, max_area=None, debug=False): | |
| """ | |
| Basic speech bubble detection using adaptive threshold + morphology. | |
| Returns: | |
| List of bubble polygons [(x,y), ...] | |
| """ | |
| img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| h, w = gray.shape | |
| if max_area is None: | |
| max_area = (h * w) // 4 # bubbles should not be entire page | |
| th = cv2.adaptiveThreshold( | |
| gray, | |
| 255, | |
| cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY, | |
| 35, | |
| 10, | |
| ) | |
| inv = 255 - th # bubbles β white | |
| kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) | |
| cleaned = cv2.morphologyEx(inv, cv2.MORPH_CLOSE, kernel_close, iterations=2) | |
| kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) | |
| cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel_open, iterations=1) | |
| contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| bubbles = [] | |
| for cnt in contours: | |
| area = cv2.contourArea(cnt) | |
| if area < min_area or area > max_area: | |
| continue | |
| x, y, bw, bh = cv2.boundingRect(cnt) | |
| aspect_ratio = max(bw, bh) / (min(bw, bh) + 1) | |
| if aspect_ratio > 5: | |
| continue | |
| perimeter = cv2.arcLength(cnt, True) | |
| if perimeter == 0: | |
| continue | |
| circularity = 4 * np.pi * area / (perimeter * perimeter + 1) | |
| epsilon = 0.01 * perimeter | |
| approx = cv2.approxPolyDP(cnt, epsilon, True) | |
| poly = [(int(p[0][0]), int(p[0][1])) for p in approx] | |
| bubbles.append(poly) | |
| print(f"π detect_speech_bubbles: {len(bubbles)} candidates") | |
| return bubbles | |
| def detect_bubbles_heuristic(img_pil, min_area=500, debug=False): | |
| # 1. Convert to OpenCv format | |
| img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) | |
| h, w = img.shape[:2] | |
| # 2. HSV Masking (Bright regions) | |
| hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) | |
| lower_white = np.array([0, 0, 215]) | |
| upper_white = np.array([180, 40, 255]) | |
| mask = cv2.inRange(hsv, lower_white, upper_white) | |
| # Clean up mask | |
| kernel_close = np.ones((15, 15), np.uint8) | |
| mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close) | |
| kernel_open = np.ones((5, 5), np.uint8) | |
| mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open) | |
| contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| bubbles = [] | |
| # Pre-compute edge map for texture checking | |
| # Canny detects text characters very well | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| edges = cv2.Canny(gray, 100, 200) | |
| for cnt in contours: | |
| area = cv2.contourArea(cnt) | |
| # --- Standard Geometric Filters --- | |
| if area < min_area or area > (h * w * 0.4): continue | |
| x, y, bw, bh = cv2.boundingRect(cnt) | |
| aspect_ratio = float(bw) / bh | |
| if aspect_ratio < 0.2 or aspect_ratio > 5.0: continue | |
| hull = cv2.convexHull(cnt) | |
| hull_area = cv2.contourArea(hull) | |
| if hull_area == 0: continue | |
| solidity = float(area) / hull_area | |
| if solidity < 0.7: continue | |
| # --- NEW: "Has Text?" Filter --- | |
| # 1. Create a mask for just this current contour | |
| curr_mask = np.zeros_like(gray) | |
| cv2.drawContours(curr_mask, [cnt], -1, 255, -1) | |
| # 2. Look at the Canny Edges INSIDE this contour | |
| # Text creates a lot of high-frequency edges. A plain white shirt does not. | |
| bubble_edges = cv2.bitwise_and(edges, edges, mask=curr_mask) | |
| edge_pixel_count = cv2.countNonZero(bubble_edges) | |
| # Density = Edge Pixels / Total Area | |
| # Typical text bubbles have density > 0.02 (2%) | |
| # Empty white walls usually have density < 0.01 | |
| density = edge_pixel_count / area | |
| if density < 0.015: | |
| if debug: print(f"Skipping white blob (Empty): density={density:.4f}") | |
| continue | |
| # Simplify shape and add | |
| epsilon = 0.005 * cv2.arcLength(cnt, True) | |
| approx = cv2.approxPolyDP(cnt, epsilon, True) | |
| poly = [(int(p[0][0]), int(p[0][1])) for p in approx] | |
| bubbles.append(poly) | |
| print(f"π Heuristic Bubbles (HSV + TextCheck): {len(bubbles)}") | |
| return bubbles | |
| def merge_overlapping_bubbles(bubbles, iou_threshold=0.3): | |
| """ | |
| Merge bubbles that overlap significantly. | |
| """ | |
| if len(bubbles) <= 1: | |
| return bubbles | |
| shapes = [] | |
| for b in bubbles: | |
| try: | |
| p = Polygon(b) | |
| if not p.is_valid: | |
| p = p.buffer(0) | |
| shapes.append(p) | |
| except Exception: | |
| continue | |
| merged_polys = [] | |
| used = set() | |
| for i, s1 in enumerate(shapes): | |
| if i in used: | |
| continue | |
| group = [s1] | |
| used.add(i) | |
| for j, s2 in enumerate(shapes[i + 1 :], start=i + 1): | |
| if j in used: | |
| continue | |
| inter = s1.intersection(s2).area | |
| union = s1.union(s2).area | |
| iou = inter / union if union > 0 else 0.0 | |
| if iou > iou_threshold: | |
| group.append(s2) | |
| used.add(j) | |
| merged_shape = unary_union(group) | |
| if merged_shape.geom_type == "Polygon": | |
| merged_polys.append([(int(x), int(y)) for x, y in merged_shape.exterior.coords[:-1]]) | |
| else: | |
| for g in merged_shape.geoms: | |
| if g.geom_type == "Polygon": | |
| merged_polys.append([(int(x), int(y)) for x, y in g.exterior.coords[:-1]]) | |
| print(f"π merge_overlapping_bubbles: {len(bubbles)} β {len(merged_polys)}") | |
| return merged_polys | |
| def filter_nested_bubbles(bubbles): | |
| """ | |
| Remove bubbles completely inside other bubbles; keep larger ones. | |
| """ | |
| if len(bubbles) <= 1: | |
| return bubbles | |
| shapes = [] | |
| for b in bubbles: | |
| try: | |
| p = Polygon(b) | |
| if not p.is_valid: | |
| p = p.buffer(0) | |
| shapes.append((p, b)) | |
| except Exception: | |
| continue | |
| shapes.sort(key=lambda x: x[0].area, reverse=True) | |
| filtered = [] | |
| for i, (s1, poly1) in enumerate(shapes): | |
| is_nested = False | |
| for j, (s2, poly2) in enumerate(shapes): | |
| if i == j: | |
| continue | |
| if s2.contains(s1): | |
| is_nested = True | |
| break | |
| if not is_nested: | |
| filtered.append(poly1) | |
| if len(filtered) < len(bubbles): | |
| print(f"ποΈ filter_nested_bubbles: removed {len(bubbles) - len(filtered)} nested") | |
| return filtered | |
| def detect_speech_bubbles_robust(img_pil, min_area=500, merge_overlaps=True, filter_nested_flag=True): | |
| """ | |
| Robust bubble detection with post-processing. | |
| This is the recommended function to use. | |
| """ | |
| bubbles = detect_bubbles_heuristic(img_pil, min_area=min_area) | |
| if not bubbles: | |
| print("β οΈ detect_speech_bubbles_robust: no initial bubbles") | |
| return [] | |
| if merge_overlaps: | |
| bubbles = merge_overlapping_bubbles(bubbles) | |
| if filter_nested_flag: | |
| bubbles = filter_nested_bubbles(bubbles) | |
| print(f"β detect_speech_bubbles_robust: final {len(bubbles)} bubbles") | |
| return bubbles |