kambris commited on
Commit
3b6cab5
·
verified ·
1 Parent(s): 4cc7b44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -37
app.py CHANGED
@@ -33,9 +33,9 @@ def find_text_lines_voynich(img_pil):
33
  thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
34
  cv2.THRESH_BINARY_INV, 11, 2)
35
 
36
- # Create a small horizontal kernel to connect characters within words
37
- # But keep it small to avoid connecting different lines
38
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 1))
39
  connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
40
 
41
  # Find contours
@@ -57,16 +57,13 @@ def find_text_lines_voynich(img_pil):
57
 
58
  print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}")
59
 
60
- # Criteria for Voynich text lines:
61
- # - Should be reasonably wide (at least 15% of width)
62
- # - Should not be too tall (text lines are horizontal)
63
- # - Should have good aspect ratio (wider than tall)
64
- # - Should not be tiny (at least 10 pixels high for readability)
65
  if (w >= search_width * 0.15 and # Minimum width
66
- h >= 10 and # Minimum height
67
- h <= search_height * 0.05 and # Maximum height (5% of search area)
68
- aspect_ratio >= 3.0 and # Should be wide
69
- width_percent <= 90): # Not the entire width (avoid page edges)
 
70
 
71
  text_contours.append((contour, x, y + skip_top, w, h)) # Add skip_top back to y
72
  print(f" ✓ ACCEPTED as text line")
@@ -84,9 +81,9 @@ def find_text_lines_voynich(img_pil):
84
 
85
  print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}")
86
 
87
- # Extract with generous margins
88
- margin_x = 30
89
- margin_y = 20
90
  y_start = max(0, y - margin_y)
91
  y_end = min(img_height, y + h + margin_y)
92
  x_start = max(0, x - margin_x)
@@ -103,7 +100,7 @@ def find_text_lines_voynich(img_pil):
103
  return scan_for_text_lines(img, skip_top)
104
 
105
  def scan_for_text_lines(img, start_y):
106
- """Scan line by line looking for text content"""
107
  if len(img.shape) == 3:
108
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
109
  else:
@@ -112,9 +109,9 @@ def scan_for_text_lines(img, start_y):
112
  img_height, img_width = gray.shape
113
 
114
  # Scan from start_y downward
115
- for y in range(start_y, img_height - 40, 10): # Check every 10 pixels
116
- # Take a 40-pixel high strip
117
- strip = gray[y:y+40, :]
118
 
119
  # Apply threshold
120
  _, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
@@ -131,11 +128,11 @@ def scan_for_text_lines(img, start_y):
131
 
132
  print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}")
133
 
134
- # If we find a region with reasonable ink and horizontal distribution
135
- if ink_ratio > 0.02 and rows_with_ink >= 5: # At least 5 rows with ink
136
- # Expand the region
137
- y_start = max(0, y - 15)
138
- y_end = min(img_height, y + 55)
139
 
140
  if len(img.shape) == 3:
141
  extracted = img[y_start:y_end, :]
@@ -145,10 +142,10 @@ def scan_for_text_lines(img, start_y):
145
  print(f"Found text at y={y}, extracting region {y_start}:{y_end}")
146
  return Image.fromarray(extracted)
147
 
148
- # If still nothing found, return a middle section
149
- print("No text found, returning middle section")
150
  mid_y = img_height // 2
151
- section = img[mid_y:mid_y + img_height//4, :]
152
  return Image.fromarray(section)
153
 
154
  def preprocess_voynich_image(img_pil):
@@ -228,26 +225,106 @@ def extract_text_block(img_pil, start_percent=0.2, height_percent=0.4):
228
  block = img[start_y:end_y, :]
229
  return Image.fromarray(block)
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  # Enhanced Gradio interface
232
- with gr.Blocks(title="Voynich Text Line Extractor - Fixed") as demo:
233
- gr.Markdown("# Voynich Text Line Extractor - Fixed Version")
234
- gr.Markdown("This version specifically looks for actual text lines in the manuscript, not page edges. It skips the top portion and searches in the text areas.")
235
 
236
  with gr.Row():
237
  with gr.Column():
238
  input_image = gr.Image(type="pil", label="Upload Voynich Folio")
239
- enhance_btn = gr.Button("Enhance Image")
240
- extract_btn = gr.Button("Find Text Lines")
241
- block_btn = gr.Button("Extract Text Block")
242
- debug_btn = gr.Button("Debug Detection")
243
 
244
- # Add slider for text block extraction
245
- start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)")
246
- height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)")
 
 
 
 
 
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  with gr.Column():
249
  enhanced_output = gr.Image(label="Enhanced Image")
250
  line_output = gr.Image(label="Extracted Text")
 
251
 
252
  with gr.Row():
253
  debug_search = gr.Image(label="1. Search Area")
@@ -255,6 +332,7 @@ with gr.Blocks(title="Voynich Text Line Extractor - Fixed") as demo:
255
  debug_thresh = gr.Image(label="3. Threshold")
256
  debug_result = gr.Image(label="4. Result")
257
 
 
258
  enhance_btn.click(
259
  fn=preprocess_voynich_image,
260
  inputs=input_image,
@@ -278,6 +356,27 @@ with gr.Blocks(title="Voynich Text Line Extractor - Fixed") as demo:
278
  inputs=input_image,
279
  outputs=[debug_search, debug_enhanced, debug_thresh, debug_result]
280
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
  if __name__ == "__main__":
283
  demo.launch()
 
33
  thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
34
  cv2.THRESH_BINARY_INV, 11, 2)
35
 
36
+ # Create a SMALLER horizontal kernel to connect characters within words
37
+ # Keep it smaller to avoid connecting different lines
38
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1)) # Reduced from (8, 1)
39
  connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
40
 
41
  # Find contours
 
57
 
58
  print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}")
59
 
60
+ # MORE RESTRICTIVE criteria for single text lines:
 
 
 
 
61
  if (w >= search_width * 0.15 and # Minimum width
62
+ h >= 8 and # Minimum height (reduced from 10)
63
+ h <= search_height * 0.03 and # SMALLER maximum height (reduced from 0.05 to 0.03)
64
+ aspect_ratio >= 5.0 and # HIGHER aspect ratio (increased from 3.0 to 5.0)
65
+ width_percent <= 85 and # Tighter width limit (reduced from 90 to 85)
66
+ height_percent <= 3.0): # Additional height percentage limit
67
 
68
  text_contours.append((contour, x, y + skip_top, w, h)) # Add skip_top back to y
69
  print(f" ✓ ACCEPTED as text line")
 
81
 
82
  print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}")
83
 
84
+ # Extract with SMALLER margins to get tighter crop
85
+ margin_x = 15 # Reduced from 30
86
+ margin_y = 10 # Reduced from 20
87
  y_start = max(0, y - margin_y)
88
  y_end = min(img_height, y + h + margin_y)
89
  x_start = max(0, x - margin_x)
 
100
  return scan_for_text_lines(img, skip_top)
101
 
102
  def scan_for_text_lines(img, start_y):
103
+ """Scan line by line looking for text content - modified for single lines"""
104
  if len(img.shape) == 3:
105
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
106
  else:
 
109
  img_height, img_width = gray.shape
110
 
111
  # Scan from start_y downward
112
+ for y in range(start_y, img_height - 25, 5): # Smaller strip, check every 5 pixels
113
+ # Take a SMALLER strip (25 pixels instead of 40)
114
+ strip = gray[y:y+25, :]
115
 
116
  # Apply threshold
117
  _, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
 
128
 
129
  print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}")
130
 
131
+ # More restrictive criteria for single lines
132
+ if ink_ratio > 0.02 and ink_ratio < 0.15 and rows_with_ink >= 3 and rows_with_ink <= 15:
133
+ # Expand the region but keep it smaller
134
+ y_start = max(0, y - 8) # Reduced margin
135
+ y_end = min(img_height, y + 33) # Smaller total height
136
 
137
  if len(img.shape) == 3:
138
  extracted = img[y_start:y_end, :]
 
142
  print(f"Found text at y={y}, extracting region {y_start}:{y_end}")
143
  return Image.fromarray(extracted)
144
 
145
+ # If still nothing found, return a smaller middle section
146
+ print("No text found, returning smaller middle section")
147
  mid_y = img_height // 2
148
+ section = img[mid_y:mid_y + img_height//8, :] # Smaller section (1/8 instead of 1/4)
149
  return Image.fromarray(section)
150
 
151
  def preprocess_voynich_image(img_pil):
 
225
  block = img[start_y:end_y, :]
226
  return Image.fromarray(block)
227
 
228
+ def manual_extract_rectangle(img_pil, x_start_percent=0.0, y_start_percent=0.2,
229
+ width_percent=1.0, height_percent=0.15):
230
+ """Manually extract a rectangular region from the image"""
231
+ if img_pil is None:
232
+ return None
233
+
234
+ img = np.array(img_pil)
235
+ img_height, img_width = img.shape[:2]
236
+
237
+ # Convert percentages to pixel coordinates
238
+ x_start = int(img_width * x_start_percent)
239
+ y_start = int(img_height * y_start_percent)
240
+ width = int(img_width * width_percent)
241
+ height = int(img_height * height_percent)
242
+
243
+ # Ensure coordinates are within image bounds
244
+ x_start = max(0, min(x_start, img_width - 1))
245
+ y_start = max(0, min(y_start, img_height - 1))
246
+ x_end = min(img_width, x_start + width)
247
+ y_end = min(img_height, y_start + height)
248
+
249
+ # Extract the rectangle
250
+ rectangle = img[y_start:y_end, x_start:x_end]
251
+
252
+ print(f"Manual extract: x={x_start}:{x_end}, y={y_start}:{y_end}, size={rectangle.shape}")
253
+
254
+ if rectangle.size > 0:
255
+ return Image.fromarray(rectangle)
256
+ else:
257
+ return None
258
+
259
+ def show_rectangle_preview(img_pil, x_start_percent=0.0, y_start_percent=0.2,
260
+ width_percent=1.0, height_percent=0.15):
261
+ """Show a preview of the rectangle that will be extracted"""
262
+ if img_pil is None:
263
+ return None
264
+
265
+ img = np.array(img_pil)
266
+ img_height, img_width = img.shape[:2]
267
+
268
+ # Convert percentages to pixel coordinates
269
+ x_start = int(img_width * x_start_percent)
270
+ y_start = int(img_height * y_start_percent)
271
+ width = int(img_width * width_percent)
272
+ height = int(img_height * height_percent)
273
+
274
+ # Ensure coordinates are within image bounds
275
+ x_start = max(0, min(x_start, img_width - 1))
276
+ y_start = max(0, min(y_start, img_height - 1))
277
+ x_end = min(img_width, x_start + width)
278
+ y_end = min(img_height, y_start + height)
279
+
280
+ # Create a copy of the image to draw on
281
+ preview = np.copy(img)
282
+
283
+ # Draw rectangle outline
284
+ cv2.rectangle(preview, (x_start, y_start), (x_end, y_end), (255, 0, 0), 2)
285
+
286
+ # Optional: Add semi-transparent overlay to show selected area
287
+ overlay = np.copy(preview)
288
+ cv2.rectangle(overlay, (x_start, y_start), (x_end, y_end), (0, 255, 0), -1)
289
+ preview = cv2.addWeighted(preview, 0.8, overlay, 0.2, 0)
290
+
291
+ return Image.fromarray(preview)
292
+
293
  # Enhanced Gradio interface
294
+ with gr.Blocks(title="Voynich Text Line Extractor - Single Line Focus") as demo:
295
+ gr.Markdown("# Voynich Text Line Extractor - Single Line Focus")
296
+ gr.Markdown("This version is optimized to extract single text lines with tighter bounding boxes.")
297
 
298
  with gr.Row():
299
  with gr.Column():
300
  input_image = gr.Image(type="pil", label="Upload Voynich Folio")
 
 
 
 
301
 
302
+ with gr.Tab("Auto Extract"):
303
+ enhance_btn = gr.Button("Enhance Image")
304
+ extract_btn = gr.Button("Find Text Lines")
305
+ block_btn = gr.Button("Extract Text Block")
306
+ debug_btn = gr.Button("Debug Detection")
307
+
308
+ # Add slider for text block extraction
309
+ start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)")
310
+ height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)")
311
 
312
+ with gr.Tab("Manual Rectangle"):
313
+ gr.Markdown("### Manual Rectangle Selection")
314
+ gr.Markdown("Adjust the sliders to manually select a rectangular region")
315
+
316
+ x_start_slider = gr.Slider(0.0, 0.9, 0.0, step=0.01, label="X Start (% from left)")
317
+ y_start_slider = gr.Slider(0.0, 0.9, 0.2, step=0.01, label="Y Start (% from top)")
318
+ width_slider = gr.Slider(0.1, 1.0, 1.0, step=0.01, label="Width (% of image)")
319
+ height_slider_manual = gr.Slider(0.05, 0.5, 0.15, step=0.01, label="Height (% of image)")
320
+
321
+ preview_btn = gr.Button("Preview Rectangle")
322
+ extract_manual_btn = gr.Button("Extract Rectangle")
323
+
324
  with gr.Column():
325
  enhanced_output = gr.Image(label="Enhanced Image")
326
  line_output = gr.Image(label="Extracted Text")
327
+ preview_output = gr.Image(label="Rectangle Preview")
328
 
329
  with gr.Row():
330
  debug_search = gr.Image(label="1. Search Area")
 
332
  debug_thresh = gr.Image(label="3. Threshold")
333
  debug_result = gr.Image(label="4. Result")
334
 
335
+ # Auto extract button handlers
336
  enhance_btn.click(
337
  fn=preprocess_voynich_image,
338
  inputs=input_image,
 
356
  inputs=input_image,
357
  outputs=[debug_search, debug_enhanced, debug_thresh, debug_result]
358
  )
359
+
360
+ # Manual rectangle handlers
361
+ preview_btn.click(
362
+ fn=show_rectangle_preview,
363
+ inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
364
+ outputs=preview_output
365
+ )
366
+
367
+ extract_manual_btn.click(
368
+ fn=manual_extract_rectangle,
369
+ inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
370
+ outputs=line_output
371
+ )
372
+
373
+ # Auto-update preview when sliders change
374
+ for slider in [x_start_slider, y_start_slider, width_slider, height_slider_manual]:
375
+ slider.change(
376
+ fn=show_rectangle_preview,
377
+ inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
378
+ outputs=preview_output
379
+ )
380
 
381
  if __name__ == "__main__":
382
  demo.launch()