qqwjq1981 commited on
Commit
3435684
·
verified ·
1 Parent(s): b164a37

Update utils/image_utils.py

Browse files
Files changed (1) hide show
  1. utils/image_utils.py +263 -45
utils/image_utils.py CHANGED
@@ -6,39 +6,214 @@ import base64
6
  from io import BytesIO
7
 
8
 
9
- # -----------------------------
10
- # Find low-complexity horizontal strip
11
- # -----------------------------
12
- def find_low_complexity_row(gray, target_row, search_pct=0.2):
 
13
  """
14
- Find a nearby row (within ±20%) that has low edge/text density.
 
 
 
 
 
 
 
 
15
  """
16
  h, w = gray.shape
17
- search_radius = int(h * search_pct)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- start = max(0, target_row - search_radius)
20
- end = min(h - 1, target_row + search_radius)
21
 
22
- # Edge detection
23
- edges = cv2.Canny(gray, 80, 160)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- # Row-wise edge density
26
- row_scores = edges[start:end].sum(axis=1)
27
 
28
- # Choose min edge-density row
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  best_local_idx = np.argmin(row_scores)
30
- best_row = start + best_local_idx
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- return best_row
33
 
 
 
 
34
 
35
- # -----------------------------
36
- # Load & Split Image (Unified API)
37
- # -----------------------------
38
- def load_and_split_image(file_obj, num_chunks):
39
  """
40
- Loads an image from a file object and splits it into num_chunks using
41
- intelligent horizontal strip detection (avoids slicing through text/bubbles).
 
 
 
 
 
 
 
 
 
42
  """
43
  if file_obj is not None:
44
  image_path = file_obj.name if hasattr(file_obj, "name") else file_obj
@@ -46,52 +221,95 @@ def load_and_split_image(file_obj, num_chunks):
46
  else:
47
  image_path = "00_sample.jpg"
48
  filename = "00_sample.jpg"
49
-
50
  # Load original image
51
  image = Image.open(image_path).convert("RGB")
52
  width, height = image.size
53
-
54
- # Convert to OpenCV for processing
 
 
55
  img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
56
  gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
57
-
58
- # If only 1 chunk no split needed
59
  if num_chunks <= 1:
60
  return filename, image, [image]
61
-
62
- # Compute approximate uniform splits
63
- approx_points = [int(i * height / num_chunks) for i in range(1, num_chunks)]
64
-
65
- # Adjust each split to nearest low-complexity row
66
- split_points = []
67
- for pt in approx_points:
68
- best = find_low_complexity_row(gray, target_row=pt)
69
- split_points.append(best)
70
-
71
- # Add top and bottom bounds
72
- split_points = [0] + split_points + [height]
73
-
74
  # Produce final chunks
75
  chunks = []
76
- for i in range(num_chunks):
 
 
77
  top = split_points[i]
78
  bottom = split_points[i + 1]
79
  chunk = image.crop((0, top, width, bottom))
80
  chunks.append(chunk)
81
-
 
 
82
  return filename, image, chunks
83
 
84
 
85
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # Encode Image to HTML
87
- # -----------------------------
 
88
  def encode_image_to_html(image: Image.Image) -> str:
89
  buffered = BytesIO()
90
  image.save(buffered, format="PNG")
91
  encoded = base64.b64encode(buffered.getvalue()).decode()
92
-
93
  return f"""
94
  <div style="height:500px; overflow-y:auto; border:1px solid #ccc;">
95
  <img src="data:image/png;base64,{encoded}" style="width:100%;" />
96
  </div>
97
- """
 
6
  from io import BytesIO
7
 
8
 
9
+ # ---------------------------------------------------------------------
10
+ # Find solid strips (low complexity horizontal regions)
11
+ # ---------------------------------------------------------------------
12
+
13
+ def analyze_horizontal_complexity(gray, window_size=5):
14
  """
15
+ Analyze complexity of each horizontal strip in the image.
16
+ Returns array of complexity scores (lower = more suitable for splitting).
17
+
18
+ Args:
19
+ gray: Grayscale image
20
+ window_size: Height of strip to analyze
21
+
22
+ Returns:
23
+ Array of complexity scores for each row
24
  """
25
  h, w = gray.shape
26
+
27
+ # Detect edges
28
+ edges = cv2.Canny(gray, 80, 160)
29
+
30
+ # Calculate variance (texture complexity) and edge density for each row
31
+ complexity_scores = []
32
+
33
+ for y in range(h):
34
+ # Define window around this row
35
+ y_start = max(0, y - window_size // 2)
36
+ y_end = min(h, y + window_size // 2)
37
+
38
+ window = gray[y_start:y_end, :]
39
+ edge_window = edges[y_start:y_end, :]
40
+
41
+ # Edge density
42
+ edge_score = np.sum(edge_window) / (w * (y_end - y_start))
43
+
44
+ # Variance (texture)
45
+ variance_score = np.var(window)
46
+
47
+ # Combined score (normalized)
48
+ combined = edge_score + variance_score / 255.0
49
+ complexity_scores.append(combined)
50
+
51
+ return np.array(complexity_scores)
52
 
 
 
53
 
54
+ def find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.1):
55
+ """
56
+ Find all solid/low-complexity horizontal strips suitable for splitting.
57
+
58
+ Args:
59
+ gray: Grayscale image
60
+ min_strip_height: Minimum consecutive rows with low complexity
61
+ complexity_threshold: Maximum complexity score (lower = stricter)
62
+
63
+ Returns:
64
+ List of (start_y, end_y, score) tuples for solid strips
65
+ """
66
+ h = gray.shape[0]
67
+ complexity = analyze_horizontal_complexity(gray)
68
+
69
+ # Normalize complexity scores
70
+ if complexity.max() > 0:
71
+ complexity = complexity / complexity.max()
72
+
73
+ # Find runs of low complexity
74
+ is_simple = complexity < complexity_threshold
75
+
76
+ strips = []
77
+ start = None
78
+
79
+ for i in range(h):
80
+ if is_simple[i]:
81
+ if start is None:
82
+ start = i
83
+ else:
84
+ if start is not None:
85
+ # End of strip
86
+ if i - start >= min_strip_height:
87
+ avg_score = np.mean(complexity[start:i])
88
+ strips.append((start, i, avg_score))
89
+ start = None
90
+
91
+ # Handle strip at end of image
92
+ if start is not None and h - start >= min_strip_height:
93
+ avg_score = np.mean(complexity[start:h])
94
+ strips.append((start, h, avg_score))
95
+
96
+ # Sort by score (best strips first)
97
+ strips.sort(key=lambda x: x[2])
98
+
99
+ return strips
100
 
 
 
101
 
102
+ def find_best_split_location(gray, target_row, search_pct=0.2, prefer_solid_strips=True):
103
+ """
104
+ Find the best row near target_row for splitting.
105
+
106
+ Args:
107
+ gray: Grayscale image
108
+ target_row: Desired split location
109
+ search_pct: Search radius as percentage of image height
110
+ prefer_solid_strips: If True, strongly prefer solid strips
111
+
112
+ Returns:
113
+ Best row index for splitting
114
+ """
115
+ h, w = gray.shape
116
+ search_radius = int(h * search_pct)
117
+
118
+ start = max(0, target_row - search_radius)
119
+ end = min(h - 1, target_row + search_radius)
120
+
121
+ if prefer_solid_strips:
122
+ # Find all solid strips in the search region
123
+ search_region = gray[start:end, :]
124
+ strips = find_solid_strips(search_region, min_strip_height=5, complexity_threshold=0.15)
125
+
126
+ if strips:
127
+ # Choose strip closest to target
128
+ best_strip = min(strips, key=lambda s: abs((s[0] + s[1]) // 2 - (target_row - start)))
129
+ # Return center of strip
130
+ return start + (best_strip[0] + best_strip[1]) // 2
131
+
132
+ # Fallback: use edge density
133
+ edges = cv2.Canny(gray, 80, 160)
134
+ row_scores = edges[start:end].sum(axis=1)
135
  best_local_idx = np.argmin(row_scores)
136
+
137
+ return start + best_local_idx
138
+
139
+
140
+ def find_optimal_splits(gray, desired_chunks, min_chunk_height=200):
141
+ """
142
+ Find optimal split locations, potentially returning fewer chunks if
143
+ good split points don't exist.
144
+
145
+ Args:
146
+ gray: Grayscale image
147
+ desired_chunks: Target number of chunks
148
+ min_chunk_height: Minimum height for each chunk
149
+
150
+ Returns:
151
+ List of split points (y-coordinates)
152
+ """
153
+ h = gray.shape[0]
154
+
155
+ # If image too small for desired chunks, reduce
156
+ max_possible_chunks = max(1, h // min_chunk_height)
157
+ actual_chunks = min(desired_chunks, max_possible_chunks)
158
+
159
+ if actual_chunks <= 1:
160
+ print(f"⚠️ Image too small for multiple chunks ({h}px height)")
161
+ return [0, h]
162
+
163
+ # Find all solid strips
164
+ solid_strips = find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.12)
165
+
166
+ if not solid_strips:
167
+ print("⚠️ No solid strips found, using uniform splits")
168
+ # Fallback to uniform splits
169
+ splits = [int(i * h / actual_chunks) for i in range(actual_chunks + 1)]
170
+ return splits
171
+
172
+ print(f"✓ Found {len(solid_strips)} solid strips")
173
+
174
+ # Calculate ideal split locations
175
+ ideal_splits = [int(i * h / actual_chunks) for i in range(1, actual_chunks)]
176
+
177
+ # Match each ideal split to nearest solid strip
178
+ actual_splits = [0] # Start
179
+
180
+ for target in ideal_splits:
181
+ # Find closest solid strip center
182
+ best_strip = min(solid_strips, key=lambda s: abs((s[0] + s[1]) // 2 - target))
183
+ split_y = (best_strip[0] + best_strip[1]) // 2
184
+
185
+ # Ensure minimum spacing from previous split
186
+ if split_y - actual_splits[-1] >= min_chunk_height:
187
+ actual_splits.append(split_y)
188
+ else:
189
+ print(f"⚠️ Skipping split at {split_y} (too close to previous)")
190
+
191
+ actual_splits.append(h) # End
192
+
193
+ num_resulting_chunks = len(actual_splits) - 1
194
+ if num_resulting_chunks < desired_chunks:
195
+ print(f"ℹ️ Returning {num_resulting_chunks} chunks (requested {desired_chunks}, but not enough good split points)")
196
+
197
+ return actual_splits
198
 
 
199
 
200
+ # ---------------------------------------------------------------------
201
+ # Load & Split Image (Enhanced)
202
+ # ---------------------------------------------------------------------
203
 
204
+ def load_and_split_image(file_obj, num_chunks, min_chunk_height=200, allow_fewer_chunks=True):
 
 
 
205
  """
206
+ Loads an image and splits it intelligently across solid strips.
207
+ Can return fewer chunks than requested if good split points don't exist.
208
+
209
+ Args:
210
+ file_obj: File object or path
211
+ num_chunks: Desired number of chunks
212
+ min_chunk_height: Minimum height per chunk (pixels)
213
+ allow_fewer_chunks: If True, can return < num_chunks
214
+
215
+ Returns:
216
+ (filename, original_image, list_of_chunks)
217
  """
218
  if file_obj is not None:
219
  image_path = file_obj.name if hasattr(file_obj, "name") else file_obj
 
221
  else:
222
  image_path = "00_sample.jpg"
223
  filename = "00_sample.jpg"
224
+
225
  # Load original image
226
  image = Image.open(image_path).convert("RGB")
227
  width, height = image.size
228
+
229
+ print(f"📏 Image size: {width}x{height}")
230
+
231
+ # Convert to OpenCV for analysis
232
  img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
233
  gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
234
+
235
+ # If only 1 chunk requested, no split needed
236
  if num_chunks <= 1:
237
  return filename, image, [image]
238
+
239
+ # Find optimal split locations
240
+ if allow_fewer_chunks:
241
+ split_points = find_optimal_splits(gray, num_chunks, min_chunk_height)
242
+ else:
243
+ # Old behavior: always return exact number of chunks
244
+ approx_points = [int(i * height / num_chunks) for i in range(1, num_chunks)]
245
+ split_points = [0]
246
+ for pt in approx_points:
247
+ best = find_best_split_location(gray, target_row=pt, prefer_solid_strips=True)
248
+ split_points.append(best)
249
+ split_points.append(height)
250
+
251
  # Produce final chunks
252
  chunks = []
253
+ num_actual_chunks = len(split_points) - 1
254
+
255
+ for i in range(num_actual_chunks):
256
  top = split_points[i]
257
  bottom = split_points[i + 1]
258
  chunk = image.crop((0, top, width, bottom))
259
  chunks.append(chunk)
260
+ print(f" Chunk {i+1}: rows {top}-{bottom} (height: {bottom-top}px)")
261
+
262
+ print(f"✅ Split into {len(chunks)} chunks")
263
  return filename, image, chunks
264
 
265
 
266
+ # ---------------------------------------------------------------------
267
+ # Visualization Helper
268
+ # ---------------------------------------------------------------------
269
+
270
+ def visualize_split_analysis(gray, split_points):
271
+ """
272
+ Create a visualization showing complexity analysis and split points.
273
+ Useful for debugging split decisions.
274
+ """
275
+ h, w = gray.shape
276
+
277
+ # Analyze complexity
278
+ complexity = analyze_horizontal_complexity(gray)
279
+
280
+ # Create visualization
281
+ vis = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
282
+
283
+ # Draw complexity heatmap on the side
284
+ heatmap_width = 50
285
+ heatmap = np.zeros((h, heatmap_width, 3), dtype=np.uint8)
286
+
287
+ normalized_complexity = (complexity / complexity.max() * 255).astype(np.uint8)
288
+ for y in range(h):
289
+ color_val = normalized_complexity[y]
290
+ heatmap[y, :] = [0, 255 - color_val, color_val] # Green=low, Red=high
291
+
292
+ # Draw split lines
293
+ for split_y in split_points[1:-1]: # Skip first and last
294
+ cv2.line(vis, (0, split_y), (w, split_y), (0, 255, 0), 2)
295
+
296
+ # Combine
297
+ result = np.hstack([vis, heatmap])
298
+
299
+ return result
300
+
301
+
302
+ # ---------------------------------------------------------------------
303
  # Encode Image to HTML
304
+ # ---------------------------------------------------------------------
305
+
306
  def encode_image_to_html(image: Image.Image) -> str:
307
  buffered = BytesIO()
308
  image.save(buffered, format="PNG")
309
  encoded = base64.b64encode(buffered.getvalue()).decode()
310
+
311
  return f"""
312
  <div style="height:500px; overflow-y:auto; border:1px solid #ccc;">
313
  <img src="data:image/png;base64,{encoded}" style="width:100%;" />
314
  </div>
315
+ """