qqwjq1981 commited on
Commit
a263a61
·
verified ·
1 Parent(s): e44cb9e

Update utils/bubble_utils.py

Browse files
Files changed (1) hide show
  1. utils/bubble_utils.py +120 -40
utils/bubble_utils.py CHANGED
@@ -1,20 +1,34 @@
 
 
 
1
  import numpy as np
2
  from PIL import Image
3
  from utils.image_utils import load_and_split_image
4
  from utils.ocr_utils import extract_and_translate_chunk
5
- from utils.polygon_utils import draw_translated_text_convex, shrink_or_expand_polygon, FONT_PATH
 
 
 
 
 
 
6
  from utils.bubble_detect import detect_speech_bubbles
7
  from utils.image_utils import encode_image_to_html
8
 
9
 
10
- def bubble_pipeline_single(file_obj, num_chunks=1):
11
  """
12
- End-to-end bubble translation pipeline:
13
  1. Global bubble detection
14
  2. OCR text extraction
15
- 3. Robust bubble-text matching
16
- 4. Inpaint + redraw inside each speech bubble
17
- 5. Assign bubbles to chunks (if panel split)
 
 
 
 
 
18
  """
19
 
20
  # ----------------------------------------------------------------------
@@ -27,49 +41,55 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
27
  # 2. Global speech bubble detection
28
  # ----------------------------------------------------------------------
29
  bubble_polygons = detect_speech_bubbles(full_img)
30
-
31
- if len(bubble_polygons) == 0:
32
- print("⚠️ No bubbles detected → FALLBACK to OCR-only pipeline.")
33
- return fallback_ocr_pipeline(file_obj, num_chunks)
34
 
35
  # ----------------------------------------------------------------------
36
  # 3. OCR detection (global)
37
  # ----------------------------------------------------------------------
38
  translations = extract_and_translate_chunk(full_img)
 
39
 
40
  if len(translations) == 0:
41
- print("⚠️ OCR found no text → fallback")
42
- return fallback_ocr_pipeline(file_obj, num_chunks)
43
 
44
  # ----------------------------------------------------------------------
45
- # 4. Assign each text box to its closest bubble
46
  # ----------------------------------------------------------------------
47
- bubble_centers = [np.mean(poly, axis=0) for poly in bubble_polygons]
48
-
49
- assignment = [] # (text, bubble_poly)
50
- for t in translations:
51
- text_center = np.mean(t["polygon"], axis=0)
52
- dists = [np.linalg.norm(text_center - c) for c in bubble_centers]
53
- best_idx = int(np.argmin(dists))
54
-
55
- assignment.append((t, bubble_polygons[best_idx]))
 
 
 
 
56
 
57
  # ----------------------------------------------------------------------
58
  # 5. Render onto a working copy of full image
59
  # ----------------------------------------------------------------------
60
  translated_full = full_img.copy()
61
 
62
- for t, bubble_poly in assignment:
63
- translated_text = t["translated"]
 
64
 
65
- # slightly shrink inside the bubble
66
- inner_poly = shrink_or_expand_polygon(bubble_poly, shrink_ratio=0.92)
 
 
 
67
 
68
  translated_full = draw_translated_text_convex(
69
  translated_full,
70
- inner_poly,
71
  translated_text,
72
- font_path=FONT_PATH, # uses default inside polygon_utils
73
  font_scale=1.0
74
  )
75
 
@@ -78,13 +98,9 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
78
  # ----------------------------------------------------------------------
79
  if num_chunks > 1:
80
  _, _, chunks = load_and_split_image(file_obj, num_chunks)
81
- translated_chunks = []
82
-
83
- w, h = full_img.size
84
- _, _, trans_chunks = load_and_split_image_image_obj(
85
- translated_full, num_chunks
86
- )
87
- translated_chunks = trans_chunks
88
  else:
89
  chunks = [full_img]
90
  translated_chunks = [translated_full]
@@ -95,18 +111,33 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
95
  orig_html = "".join([encode_image_to_html(c) for c in chunks])
96
  trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
97
 
98
- # table for manual edit
99
  table_data = [[t["original"], t["translated"]] for t in translations]
100
 
101
  return filename, orig_html, trans_html, table_data, [translations]
102
 
103
 
104
- # ========================================================================
105
- # Fallback OCR Pipeline
106
- # ========================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- from utils.polygon_utils import render_translated_chunk
109
 
 
 
 
110
 
111
  def fallback_ocr_pipeline(file_obj, num_chunks):
112
  """
@@ -134,3 +165,52 @@ def fallback_ocr_pipeline(file_obj, num_chunks):
134
  trans = "".join([encode_image_to_html(t) for t in translated_images])
135
 
136
  return filename, orig, trans, all_tables, all_translations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced bubble detection pipeline with polygon correction
3
+ """
4
  import numpy as np
5
  from PIL import Image
6
  from utils.image_utils import load_and_split_image
7
  from utils.ocr_utils import extract_and_translate_chunk
8
+ from utils.polygon_utils import (
9
+ draw_translated_text_convex,
10
+ shrink_or_expand_polygon,
11
+ FONT_PATH,
12
+ correct_ocr_polygons_with_bubbles,
13
+ render_translated_chunk
14
+ )
15
  from utils.bubble_detect import detect_speech_bubbles
16
  from utils.image_utils import encode_image_to_html
17
 
18
 
19
+ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid"):
20
  """
21
+ End-to-end bubble translation pipeline with polygon correction:
22
  1. Global bubble detection
23
  2. OCR text extraction
24
+ 3. Correct OCR polygons using detected bubbles
25
+ 4. Inpaint + redraw inside corrected polygons
26
+ 5. Split into chunks if needed
27
+
28
+ Args:
29
+ file_obj: Input image file
30
+ num_chunks: Number of panels to split into
31
+ polygon_strategy: How to correct polygons ("hybrid", "bubble", "intersect", "expand")
32
  """
33
 
34
  # ----------------------------------------------------------------------
 
41
  # 2. Global speech bubble detection
42
  # ----------------------------------------------------------------------
43
  bubble_polygons = detect_speech_bubbles(full_img)
44
+ print(f"🔍 Detected {len(bubble_polygons)} speech bubbles")
 
 
 
45
 
46
  # ----------------------------------------------------------------------
47
  # 3. OCR detection (global)
48
  # ----------------------------------------------------------------------
49
  translations = extract_and_translate_chunk(full_img)
50
+ print(f"📝 OCR found {len(translations)} text regions")
51
 
52
  if len(translations) == 0:
53
+ print("⚠️ OCR found no text → showing original image")
54
+ return fallback_empty(file_obj, num_chunks, full_img)
55
 
56
  # ----------------------------------------------------------------------
57
+ # 4. Correct OCR polygons using bubble detection
58
  # ----------------------------------------------------------------------
59
+ if len(bubble_polygons) > 0:
60
+ print(f"✨ Correcting OCR polygons using bubble detection (strategy: {polygon_strategy})")
61
+ translations = correct_ocr_polygons_with_bubbles(
62
+ translations,
63
+ bubble_polygons,
64
+ strategy=polygon_strategy
65
+ )
66
+
67
+ # Stats
68
+ matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
69
+ print(f"✅ Matched {matched}/{len(translations)} text regions to bubbles")
70
+ else:
71
+ print("⚠️ No bubbles detected → using original OCR polygons")
72
 
73
  # ----------------------------------------------------------------------
74
  # 5. Render onto a working copy of full image
75
  # ----------------------------------------------------------------------
76
  translated_full = full_img.copy()
77
 
78
+ for t in translations:
79
+ polygon = t.get("polygon")
80
+ translated_text = t.get("translated", "")
81
 
82
+ if not polygon or not translated_text:
83
+ continue
84
+
85
+ # Slightly shrink for better visual appearance
86
+ render_poly = shrink_or_expand_polygon(polygon, shrink_ratio=0.92)
87
 
88
  translated_full = draw_translated_text_convex(
89
  translated_full,
90
+ render_poly,
91
  translated_text,
92
+ font_path=FONT_PATH,
93
  font_scale=1.0
94
  )
95
 
 
98
  # ----------------------------------------------------------------------
99
  if num_chunks > 1:
100
  _, _, chunks = load_and_split_image(file_obj, num_chunks)
101
+
102
+ # Split translated image the same way
103
+ translated_chunks = split_image_into_chunks(translated_full, num_chunks)
 
 
 
 
104
  else:
105
  chunks = [full_img]
106
  translated_chunks = [translated_full]
 
111
  orig_html = "".join([encode_image_to_html(c) for c in chunks])
112
  trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
113
 
114
+ # Table for manual edit
115
  table_data = [[t["original"], t["translated"]] for t in translations]
116
 
117
  return filename, orig_html, trans_html, table_data, [translations]
118
 
119
 
120
+ def split_image_into_chunks(img, num_chunks):
121
+ """Split PIL Image vertically into equal chunks"""
122
+ if num_chunks <= 1:
123
+ return [img]
124
+
125
+ width, height = img.size
126
+ chunk_height = height // num_chunks
127
+ chunks = []
128
+
129
+ for i in range(num_chunks):
130
+ top = i * chunk_height
131
+ bottom = height if i == num_chunks - 1 else (i + 1) * chunk_height
132
+ chunk = img.crop((0, top, width, bottom))
133
+ chunks.append(chunk)
134
+
135
+ return chunks
136
 
 
137
 
138
+ # ========================================================================
139
+ # Fallback Pipelines
140
+ # ========================================================================
141
 
142
  def fallback_ocr_pipeline(file_obj, num_chunks):
143
  """
 
165
  trans = "".join([encode_image_to_html(t) for t in translated_images])
166
 
167
  return filename, orig, trans, all_tables, all_translations
168
+
169
+
170
+ def fallback_empty(file_obj, num_chunks, full_img):
171
+ """Fallback when no text is detected"""
172
+ filename, _, chunks = load_and_split_image(file_obj, num_chunks)
173
+
174
+ orig = "".join([encode_image_to_html(c) for c in chunks])
175
+ trans = orig # No translation to show
176
+
177
+ return filename, orig, trans, [], [[]]
178
+
179
+
180
+ # ========================================================================
181
+ # Debug/Visualization Utilities
182
+ # ========================================================================
183
+
184
+ def visualize_polygon_correction(img, translations, bubble_polygons, output_path=None):
185
+ """
186
+ Create debug visualization showing:
187
+ - Original OCR polygons in red
188
+ - Detected bubbles in blue
189
+ - Corrected polygons in green
190
+ """
191
+ from PIL import ImageDraw
192
+
193
+ debug_img = img.copy()
194
+ draw = ImageDraw.Draw(debug_img, 'RGBA')
195
+
196
+ # Draw bubbles in blue
197
+ for bubble in bubble_polygons:
198
+ draw.polygon(bubble, outline=(0, 0, 255, 128), width=2)
199
+
200
+ # Draw OCR polygons
201
+ for t in translations:
202
+ orig_poly = t.get("original_polygon")
203
+ corrected_poly = t.get("polygon")
204
+
205
+ # Original in red
206
+ if orig_poly:
207
+ draw.polygon(orig_poly, outline=(255, 0, 0, 128), width=2)
208
+
209
+ # Corrected in green
210
+ if corrected_poly:
211
+ draw.polygon(corrected_poly, outline=(0, 255, 0, 192), width=3)
212
+
213
+ if output_path:
214
+ debug_img.save(output_path)
215
+
216
+ return debug_img