qqwjq1981 commited on
Commit
e44cb9e
Β·
verified Β·
1 Parent(s): 2d1d7b4

Update utils/bubble_detect.py

Browse files
Files changed (1) hide show
  1. utils/bubble_detect.py +197 -187
utils/bubble_detect.py CHANGED
@@ -1,216 +1,226 @@
1
  """
2
- Enhanced bubble detection pipeline with polygon correction
3
  """
 
4
  import numpy as np
5
- from PIL import Image
6
- from utils.image_utils import load_and_split_image
7
- from utils.ocr_utils import extract_and_translate_chunk
8
- from utils.polygon_utils import (
9
- draw_translated_text_convex,
10
- shrink_or_expand_polygon,
11
- FONT_PATH,
12
- correct_ocr_polygons_with_bubbles,
13
- render_translated_chunk
14
- )
15
- from utils.bubble_detect import detect_speech_bubbles
16
- from utils.image_utils import encode_image_to_html
17
 
18
 
19
- def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid"):
20
  """
21
- End-to-end bubble translation pipeline with polygon correction:
22
- 1. Global bubble detection
23
- 2. OCR text extraction
24
- 3. Correct OCR polygons using detected bubbles
25
- 4. Inpaint + redraw inside corrected polygons
26
- 5. Split into chunks if needed
27
 
28
  Args:
29
- file_obj: Input image file
30
- num_chunks: Number of panels to split into
31
- polygon_strategy: How to correct polygons ("hybrid", "bubble", "intersect", "expand")
 
 
 
 
32
  """
33
-
34
- # ----------------------------------------------------------------------
35
- # 1. Load full image (no splitting yet)
36
- # ----------------------------------------------------------------------
37
- filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
38
- full_img_cv = np.array(full_img)
39
-
40
- # ----------------------------------------------------------------------
41
- # 2. Global speech bubble detection
42
- # ----------------------------------------------------------------------
43
- bubble_polygons = detect_speech_bubbles(full_img)
44
- print(f"πŸ” Detected {len(bubble_polygons)} speech bubbles")
45
-
46
- # ----------------------------------------------------------------------
47
- # 3. OCR detection (global)
48
- # ----------------------------------------------------------------------
49
- translations = extract_and_translate_chunk(full_img)
50
- print(f"πŸ“ OCR found {len(translations)} text regions")
51
-
52
- if len(translations) == 0:
53
- print("⚠️ OCR found no text β†’ showing original image")
54
- return fallback_empty(file_obj, num_chunks, full_img)
55
-
56
- # ----------------------------------------------------------------------
57
- # 4. Correct OCR polygons using bubble detection
58
- # ----------------------------------------------------------------------
59
- if len(bubble_polygons) > 0:
60
- print(f"✨ Correcting OCR polygons using bubble detection (strategy: {polygon_strategy})")
61
- translations = correct_ocr_polygons_with_bubbles(
62
- translations,
63
- bubble_polygons,
64
- strategy=polygon_strategy
65
- )
66
 
67
- # Stats
68
- matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
69
- print(f"βœ… Matched {matched}/{len(translations)} text regions to bubbles")
70
- else:
71
- print("⚠️ No bubbles detected β†’ using original OCR polygons")
72
-
73
- # ----------------------------------------------------------------------
74
- # 5. Render onto a working copy of full image
75
- # ----------------------------------------------------------------------
76
- translated_full = full_img.copy()
77
-
78
- for t in translations:
79
- polygon = t.get("polygon")
80
- translated_text = t.get("translated", "")
81
-
82
- if not polygon or not translated_text:
83
  continue
84
-
85
- # Slightly shrink for better visual appearance
86
- render_poly = shrink_or_expand_polygon(polygon, shrink_ratio=0.92)
87
-
88
- translated_full = draw_translated_text_convex(
89
- translated_full,
90
- render_poly,
91
- translated_text,
92
- font_path=FONT_PATH,
93
- font_scale=1.0
94
- )
95
-
96
- # ----------------------------------------------------------------------
97
- # 6. Split full translated image into chunks (if num_chunks > 1)
98
- # ----------------------------------------------------------------------
99
- if num_chunks > 1:
100
- _, _, chunks = load_and_split_image(file_obj, num_chunks)
101
 
102
- # Split translated image the same way
103
- translated_chunks = split_image_into_chunks(translated_full, num_chunks)
104
- else:
105
- chunks = [full_img]
106
- translated_chunks = [translated_full]
107
-
108
- # ----------------------------------------------------------------------
109
- # 7. Convert for HTML
110
- # ----------------------------------------------------------------------
111
- orig_html = "".join([encode_image_to_html(c) for c in chunks])
112
- trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
113
-
114
- # Table for manual edit
115
- table_data = [[t["original"], t["translated"]] for t in translations]
116
-
117
- return filename, orig_html, trans_html, table_data, [translations]
118
-
119
-
120
- def split_image_into_chunks(img, num_chunks):
121
- """Split PIL Image vertically into equal chunks"""
122
- if num_chunks <= 1:
123
- return [img]
124
-
125
- width, height = img.size
126
- chunk_height = height // num_chunks
127
- chunks = []
128
-
129
- for i in range(num_chunks):
130
- top = i * chunk_height
131
- bottom = height if i == num_chunks - 1 else (i + 1) * chunk_height
132
- chunk = img.crop((0, top, width, bottom))
133
- chunks.append(chunk)
134
-
135
- return chunks
136
-
 
 
137
 
138
- # ========================================================================
139
- # Fallback Pipelines
140
- # ========================================================================
141
 
142
- def fallback_ocr_pipeline(file_obj, num_chunks):
143
  """
144
- Standard OCR-based translation pipeline.
145
- Used when bubble detection fails or page has no bubbles.
146
  """
147
- filename, image, chunks = load_and_split_image(file_obj, num_chunks)
148
-
149
- all_translations = []
150
- all_tables = []
151
- translated_images = []
152
-
153
- for chunk in chunks:
154
- trans = extract_and_translate_chunk(chunk)
155
-
156
- tbl = [[t["original"], t["translated"]] for t in trans]
157
-
158
- all_translations.append(trans)
159
- all_tables.extend(tbl)
160
-
161
- img_t = render_translated_chunk(chunk, trans)
162
- translated_images.append(img_t)
163
-
164
- orig = "".join([encode_image_to_html(c) for c in chunks])
165
- trans = "".join([encode_image_to_html(t) for t in translated_images])
166
-
167
- return filename, orig, trans, all_tables, all_translations
168
-
169
-
170
- def fallback_empty(file_obj, num_chunks, full_img):
171
- """Fallback when no text is detected"""
172
- filename, _, chunks = load_and_split_image(file_obj, num_chunks)
173
 
174
- orig = "".join([encode_image_to_html(c) for c in chunks])
175
- trans = orig # No translation to show
176
 
177
- return filename, orig, trans, [], [[]]
178
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- # ========================================================================
181
- # Debug/Visualization Utilities
182
- # ========================================================================
183
 
184
- def visualize_polygon_correction(img, translations, bubble_polygons, output_path=None):
185
  """
186
- Create debug visualization showing:
187
- - Original OCR polygons in red
188
- - Detected bubbles in blue
189
- - Corrected polygons in green
190
  """
191
- from PIL import ImageDraw
 
 
 
192
 
193
- debug_img = img.copy()
194
- draw = ImageDraw.Draw(debug_img, 'RGBA')
 
 
 
 
 
 
 
195
 
196
- # Draw bubbles in blue
197
- for bubble in bubble_polygons:
198
- draw.polygon(bubble, outline=(0, 0, 255, 128), width=2)
199
 
200
- # Draw OCR polygons
201
- for t in translations:
202
- orig_poly = t.get("original_polygon")
203
- corrected_poly = t.get("polygon")
204
 
205
- # Original in red
206
- if orig_poly:
207
- draw.polygon(orig_poly, outline=(255, 0, 0, 128), width=2)
 
 
 
 
 
208
 
209
- # Corrected in green
210
- if corrected_poly:
211
- draw.polygon(corrected_poly, outline=(0, 255, 0, 192), width=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
- if output_path:
214
- debug_img.save(output_path)
215
 
216
- return debug_img
 
 
1
  """
2
+ Enhanced speech bubble detection for manga
3
  """
4
+ import cv2
5
  import numpy as np
6
+ from shapely.geometry import Polygon
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
+ def detect_speech_bubbles(img_pil, min_area=500, max_area=None, debug=False):
10
  """
11
+ Detect speech bubbles in manga images.
 
 
 
 
 
12
 
13
  Args:
14
+ img_pil: PIL Image
15
+ min_area: Minimum bubble area in pixels
16
+ max_area: Maximum bubble area (None = 1/4 of image)
17
+ debug: If True, return debug info
18
+
19
+ Returns:
20
+ List of bubble polygons [(x,y), ...]
21
  """
22
+ img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
23
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
24
+
25
+ h, w = gray.shape
26
+ if max_area is None:
27
+ max_area = (h * w) // 4 # Max 1/4 of image
28
+
29
+ # Adaptive threshold handles varying lighting better
30
+ th = cv2.adaptiveThreshold(
31
+ gray, 255,
32
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
33
+ cv2.THRESH_BINARY,
34
+ 35, 10
35
+ )
36
+
37
+ inv = 255 - th # Bubbles become white regions
38
+
39
+ # Close small gaps in bubble borders
40
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
41
+ cleaned = cv2.morphologyEx(inv, cv2.MORPH_CLOSE, kernel, iterations=2)
42
+
43
+ # Remove small noise
44
+ kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
45
+ cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel_open, iterations=1)
46
+
47
+ contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
48
+
49
+ bubbles = []
50
+ debug_info = []
51
+
52
+ for cnt in contours:
53
+ area = cv2.contourArea(cnt)
 
54
 
55
+ # Filter by area
56
+ if area < min_area or area > max_area:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ # Get bounding box
60
+ x, y, bw, bh = cv2.boundingRect(cnt)
61
+
62
+ # Filter by aspect ratio (too thin/wide = not a bubble)
63
+ aspect_ratio = max(bw, bh) / (min(bw, bh) + 1)
64
+ if aspect_ratio > 5: # Too elongated
65
+ continue
66
+
67
+ # Check if shape is reasonably bubble-like
68
+ # Bubbles are usually somewhat round/elliptical
69
+ perimeter = cv2.arcLength(cnt, True)
70
+ circularity = 4 * np.pi * area / (perimeter * perimeter + 1)
71
+
72
+ # Approximate polygon
73
+ epsilon = 0.01 * perimeter
74
+ approx = cv2.approxPolyDP(cnt, epsilon, True)
75
+
76
+ poly = [(int(p[0][0]), int(p[0][1])) for p in approx]
77
+
78
+ # Store bubble
79
+ bubbles.append(poly)
80
+
81
+ if debug:
82
+ debug_info.append({
83
+ 'area': area,
84
+ 'aspect_ratio': aspect_ratio,
85
+ 'circularity': circularity,
86
+ 'vertices': len(poly),
87
+ 'bbox': (x, y, bw, bh)
88
+ })
89
+
90
+ print(f"🎈 Detected {len(bubbles)} candidate bubbles")
91
+
92
+ if debug:
93
+ return bubbles, debug_info
94
+
95
+ return bubbles
96
 
 
 
 
97
 
98
+ def merge_overlapping_bubbles(bubbles, iou_threshold=0.3):
99
  """
100
+ Merge bubbles that overlap significantly.
101
+ Useful when bubble detection creates multiple contours for one bubble.
102
  """
103
+ from shapely.geometry import Polygon
104
+ from shapely.ops import unary_union
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ if len(bubbles) <= 1:
107
+ return bubbles
108
 
109
+ # Convert to Shapely polygons
110
+ shapes = []
111
+ for b in bubbles:
112
+ try:
113
+ p = Polygon(b)
114
+ if not p.is_valid:
115
+ p = p.buffer(0)
116
+ shapes.append(p)
117
+ except:
118
+ continue
119
+
120
+ # Group overlapping bubbles
121
+ merged = []
122
+ used = set()
123
+
124
+ for i, shape1 in enumerate(shapes):
125
+ if i in used:
126
+ continue
127
+
128
+ group = [shape1]
129
+ used.add(i)
130
+
131
+ for j, shape2 in enumerate(shapes[i+1:], start=i+1):
132
+ if j in used:
133
+ continue
134
+
135
+ # Check overlap
136
+ intersection = shape1.intersection(shape2).area
137
+ union = shape1.union(shape2).area
138
+ iou = intersection / union if union > 0 else 0
139
+
140
+ if iou > iou_threshold:
141
+ group.append(shape2)
142
+ used.add(j)
143
+
144
+ # Merge group
145
+ if len(group) > 1:
146
+ merged_shape = unary_union(group)
147
+ if merged_shape.geom_type == 'Polygon':
148
+ merged.append(list(merged_shape.exterior.coords)[:-1])
149
+ else:
150
+ # Multiple separate regions - add them separately
151
+ for geom in merged_shape.geoms:
152
+ if geom.geom_type == 'Polygon':
153
+ merged.append(list(geom.exterior.coords)[:-1])
154
+ else:
155
+ merged.append(list(group[0].exterior.coords)[:-1])
156
+
157
+ print(f"πŸ”„ Merged {len(bubbles)} bubbles β†’ {len(merged)} bubbles")
158
+ return merged
159
 
 
 
 
160
 
161
+ def filter_nested_bubbles(bubbles):
162
  """
163
+ Remove bubbles that are completely inside other bubbles.
164
+ Keeps the outer bubble.
 
 
165
  """
166
+ from shapely.geometry import Polygon
167
+
168
+ if len(bubbles) <= 1:
169
+ return bubbles
170
 
171
+ shapes = []
172
+ for b in bubbles:
173
+ try:
174
+ p = Polygon(b)
175
+ if not p.is_valid:
176
+ p = p.buffer(0)
177
+ shapes.append((p, b))
178
+ except:
179
+ continue
180
 
181
+ # Sort by area (largest first)
182
+ shapes.sort(key=lambda x: x[0].area, reverse=True)
 
183
 
184
+ filtered = []
185
+ for i, (shape1, poly1) in enumerate(shapes):
186
+ is_nested = False
 
187
 
188
+ for j, (shape2, poly2) in enumerate(shapes):
189
+ if i == j:
190
+ continue
191
+
192
+ # Check if shape1 is inside shape2
193
+ if shape2.contains(shape1):
194
+ is_nested = True
195
+ break
196
 
197
+ if not is_nested:
198
+ filtered.append(poly1)
199
+
200
+ if len(filtered) < len(bubbles):
201
+ print(f"πŸ—‘οΈ Removed {len(bubbles) - len(filtered)} nested bubbles")
202
+
203
+ return filtered
204
+
205
+
206
+ def detect_speech_bubbles_robust(img_pil, min_area=500, merge_overlaps=True, filter_nested=True):
207
+ """
208
+ Robust bubble detection with post-processing.
209
+
210
+ This is the recommended function to use.
211
+ """
212
+ # Initial detection
213
+ bubbles = detect_speech_bubbles(img_pil, min_area=min_area)
214
+
215
+ if len(bubbles) == 0:
216
+ return []
217
+
218
+ # Post-processing
219
+ if merge_overlaps:
220
+ bubbles = merge_overlapping_bubbles(bubbles)
221
 
222
+ if filter_nested:
223
+ bubbles = filter_nested_bubbles(bubbles)
224
 
225
+ print(f"βœ… Final: {len(bubbles)} speech bubbles")
226
+ return bubbles