Spaces:
Sleeping
Sleeping
File size: 14,531 Bytes
cc4d08b dd6f5e9 cc4d08b d9f08e4 cc4d08b d9f08e4 58bc570 5ae1466 dd6f5e9 5ae1466 dd6f5e9 cc4d08b dd6f5e9 cc4d08b dd6f5e9 d9f08e4 3b6cab5 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 3b6cab5 dd6f5e9 3b6cab5 dd6f5e9 3b6cab5 dd6f5e9 3b6cab5 dd6f5e9 d9f08e4 58bc570 d9f08e4 dd6f5e9 3b6cab5 d3db534 dd6f5e9 5ae1466 dd6f5e9 cc4d08b dd6f5e9 cc4d08b dd6f5e9 3b6cab5 58bc570 dd6f5e9 cc4d08b 3b6cab5 dd6f5e9 3b6cab5 dd6f5e9 cc4d08b 58bc570 cc4d08b 58bc570 cc4d08b d9f08e4 cc4d08b d9f08e4 cc4d08b dd6f5e9 b832053 d9f08e4 b832053 dd6f5e9 d9f08e4 4cc7b44 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 d9f08e4 b832053 d9f08e4 dd6f5e9 d9f08e4 dd6f5e9 b832053 dd6f5e9 b832053 3b6cab5 d9f08e4 3b6cab5 cc4d08b dd6f5e9 3b6cab5 cc4d08b 3b6cab5 cc4d08b dd6f5e9 3b6cab5 d9f08e4 b832053 dd6f5e9 cc4d08b 3b6cab5 cc4d08b dd6f5e9 58bc570 dd6f5e9 cc4d08b b832053 dd6f5e9 b832053 dd6f5e9 b832053 3b6cab5 cc4d08b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | import gradio as gr
import cv2
import numpy as np
from PIL import Image
def find_text_lines_voynich(img_pil):
"""Specialized function to find actual Voynich text lines, not page edges"""
if img_pil is None:
return None
# Convert to OpenCV format
img = np.array(img_pil)
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray = img
img_height, img_width = gray.shape
print(f"Processing image: {img_width}x{img_height}")
# Skip the top portion where page edges and headers might be
# Look for text in the middle and lower portions
skip_top = int(img_height * 0.15) # Skip top 15%
search_area = gray[skip_top:, :]
print(f"Searching in area starting from y={skip_top}")
# Enhance contrast specifically for faded manuscript text
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(search_area)
# Use adaptive thresholding which works better for manuscripts
thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# Create a SMALLER horizontal kernel to connect characters within words
# Keep it smaller to avoid connecting different lines
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1)) # Reduced from (8, 1)
connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# Find contours
contours, _ = cv2.findContours(connected, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
print(f"Found {len(contours)} contours in search area")
# Filter for text-like contours
text_contours = []
search_height, search_width = search_area.shape
for i, contour in enumerate(contours):
x, y, w, h = cv2.boundingRect(contour)
area = cv2.contourArea(contour)
# Calculate properties
aspect_ratio = w / h if h > 0 else 0
width_percent = (w / search_width) * 100
height_percent = (h / search_height) * 100
print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}")
# MORE RESTRICTIVE criteria for single text lines:
if (w >= search_width * 0.15 and # Minimum width
h >= 8 and # Minimum height (reduced from 10)
h <= search_height * 0.03 and # SMALLER maximum height (reduced from 0.05 to 0.03)
aspect_ratio >= 5.0 and # HIGHER aspect ratio (increased from 3.0 to 5.0)
width_percent <= 85 and # Tighter width limit (reduced from 90 to 85)
height_percent <= 3.0): # Additional height percentage limit
text_contours.append((contour, x, y + skip_top, w, h)) # Add skip_top back to y
print(f" ✓ ACCEPTED as text line")
else:
print(f" ✗ REJECTED")
print(f"Found {len(text_contours)} potential text lines")
if text_contours:
# Sort by y-coordinate to get the topmost text line
text_contours.sort(key=lambda x: x[2]) # Sort by y position
# Take the first text line found
contour, x, y, w, h = text_contours[0]
print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}")
# Extract with SMALLER margins to get tighter crop
margin_x = 15 # Reduced from 30
margin_y = 10 # Reduced from 20
y_start = max(0, y - margin_y)
y_end = min(img_height, y + h + margin_y)
x_start = max(0, x - margin_x)
x_end = min(img_width, x + w + margin_x)
extracted = img[y_start:y_end, x_start:x_end]
if extracted.size > 0:
print(f"Successfully extracted line: {extracted.shape}")
return Image.fromarray(extracted)
# Fallback: If no text lines found, try scanning line by line in lower portion
print("No contours found, trying line-by-line scan...")
return scan_for_text_lines(img, skip_top)
def scan_for_text_lines(img, start_y):
"""Scan line by line looking for text content - modified for single lines"""
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray = img
img_height, img_width = gray.shape
# Scan from start_y downward
for y in range(start_y, img_height - 25, 5): # Smaller strip, check every 5 pixels
# Take a SMALLER strip (25 pixels instead of 40)
strip = gray[y:y+25, :]
# Apply threshold
_, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Count dark pixels (ink)
ink_pixels = np.sum(thresh > 0)
total_pixels = strip.shape[0] * strip.shape[1]
ink_ratio = ink_pixels / total_pixels
# Also check if the ink is distributed horizontally (like text)
# Sum ink pixels in each row
row_sums = np.sum(thresh, axis=1)
rows_with_ink = np.sum(row_sums > 0)
print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}")
# More restrictive criteria for single lines
if ink_ratio > 0.02 and ink_ratio < 0.15 and rows_with_ink >= 3 and rows_with_ink <= 15:
# Expand the region but keep it smaller
y_start = max(0, y - 8) # Reduced margin
y_end = min(img_height, y + 33) # Smaller total height
if len(img.shape) == 3:
extracted = img[y_start:y_end, :]
else:
extracted = gray[y_start:y_end, :]
print(f"Found text at y={y}, extracting region {y_start}:{y_end}")
return Image.fromarray(extracted)
# If still nothing found, return a smaller middle section
print("No text found, returning smaller middle section")
mid_y = img_height // 2
section = img[mid_y:mid_y + img_height//8, :] # Smaller section (1/8 instead of 1/4)
return Image.fromarray(section)
def preprocess_voynich_image(img_pil):
"""Enhanced preprocessing for Voynich manuscript images"""
if img_pil is None:
return None
img = np.array(img_pil)
# Convert to LAB color space
lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
l, a, b = cv2.split(lab)
# Apply CLAHE to L channel
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
l = clahe.apply(l)
# Merge channels back
enhanced = cv2.merge([l, a, b])
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
return Image.fromarray(enhanced)
def debug_voynich_detection(img_pil):
"""Debug function showing the detection process"""
if img_pil is None:
return None, None, None, None
img = np.array(img_pil)
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray = img
img_height, img_width = gray.shape
# Show the search area (skip top 5%)
skip_top = int(img_height * 0.05)
search_area = gray[skip_top:, :]
# Create a visualization showing the search area
search_viz = np.copy(gray)
cv2.rectangle(search_viz, (0, skip_top), (img_width, img_height), (128), 2)
# Apply CLAHE to search area
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(search_area)
# Apply threshold
thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# Show full-size threshold result
thresh_full = np.zeros_like(gray)
thresh_full[skip_top:, :] = thresh
# Get the final result
result = find_text_lines_voynich(img_pil)
return (Image.fromarray(search_viz),
Image.fromarray(enhanced),
Image.fromarray(thresh_full),
result)
def extract_text_block(img_pil, start_percent=0.2, height_percent=0.4):
"""Extract a block of text from a specific region"""
if img_pil is None:
return None
img = np.array(img_pil)
img_height = img.shape[0]
start_y = int(img_height * start_percent)
block_height = int(img_height * height_percent)
end_y = min(img_height, start_y + block_height)
block = img[start_y:end_y, :]
return Image.fromarray(block)
def manual_extract_rectangle(img_pil, x_start_percent=0.0, y_start_percent=0.2,
width_percent=1.0, height_percent=0.15):
"""Manually extract a rectangular region from the image"""
if img_pil is None:
return None
img = np.array(img_pil)
img_height, img_width = img.shape[:2]
# Convert percentages to pixel coordinates
x_start = int(img_width * x_start_percent)
y_start = int(img_height * y_start_percent)
width = int(img_width * width_percent)
height = int(img_height * height_percent)
# Ensure coordinates are within image bounds
x_start = max(0, min(x_start, img_width - 1))
y_start = max(0, min(y_start, img_height - 1))
x_end = min(img_width, x_start + width)
y_end = min(img_height, y_start + height)
# Extract the rectangle
rectangle = img[y_start:y_end, x_start:x_end]
print(f"Manual extract: x={x_start}:{x_end}, y={y_start}:{y_end}, size={rectangle.shape}")
if rectangle.size > 0:
return Image.fromarray(rectangle)
else:
return None
def show_rectangle_preview(img_pil, x_start_percent=0.0, y_start_percent=0.2,
width_percent=1.0, height_percent=0.15):
"""Show a preview of the rectangle that will be extracted"""
if img_pil is None:
return None
img = np.array(img_pil)
img_height, img_width = img.shape[:2]
# Convert percentages to pixel coordinates
x_start = int(img_width * x_start_percent)
y_start = int(img_height * y_start_percent)
width = int(img_width * width_percent)
height = int(img_height * height_percent)
# Ensure coordinates are within image bounds
x_start = max(0, min(x_start, img_width - 1))
y_start = max(0, min(y_start, img_height - 1))
x_end = min(img_width, x_start + width)
y_end = min(img_height, y_start + height)
# Create a copy of the image to draw on
preview = np.copy(img)
# Draw rectangle outline
cv2.rectangle(preview, (x_start, y_start), (x_end, y_end), (255, 0, 0), 2)
# Optional: Add semi-transparent overlay to show selected area
overlay = np.copy(preview)
cv2.rectangle(overlay, (x_start, y_start), (x_end, y_end), (0, 255, 0), -1)
preview = cv2.addWeighted(preview, 0.8, overlay, 0.2, 0)
return Image.fromarray(preview)
# Enhanced Gradio interface
with gr.Blocks(title="Voynich Text Line Extractor - Single Line Focus") as demo:
gr.Markdown("# Voynich Text Line Extractor - Single Line Focus")
gr.Markdown("This version is optimized to extract single text lines with tighter bounding boxes.")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Upload Voynich Folio")
with gr.Tab("Auto Extract"):
enhance_btn = gr.Button("Enhance Image")
extract_btn = gr.Button("Find Text Lines")
block_btn = gr.Button("Extract Text Block")
debug_btn = gr.Button("Debug Detection")
# Add slider for text block extraction
start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)")
height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)")
with gr.Tab("Manual Rectangle"):
gr.Markdown("### Manual Rectangle Selection")
gr.Markdown("Adjust the sliders to manually select a rectangular region")
x_start_slider = gr.Slider(0.0, 0.9, 0.0, step=0.01, label="X Start (% from left)")
y_start_slider = gr.Slider(0.0, 0.9, 0.2, step=0.01, label="Y Start (% from top)")
width_slider = gr.Slider(0.1, 1.0, 1.0, step=0.01, label="Width (% of image)")
height_slider_manual = gr.Slider(0.05, 0.5, 0.15, step=0.01, label="Height (% of image)")
preview_btn = gr.Button("Preview Rectangle")
extract_manual_btn = gr.Button("Extract Rectangle")
with gr.Column():
enhanced_output = gr.Image(label="Enhanced Image")
line_output = gr.Image(label="Extracted Text")
preview_output = gr.Image(label="Rectangle Preview")
with gr.Row():
debug_search = gr.Image(label="1. Search Area")
debug_enhanced = gr.Image(label="2. Enhanced")
debug_thresh = gr.Image(label="3. Threshold")
debug_result = gr.Image(label="4. Result")
# Auto extract button handlers
enhance_btn.click(
fn=preprocess_voynich_image,
inputs=input_image,
outputs=enhanced_output
)
extract_btn.click(
fn=find_text_lines_voynich,
inputs=input_image,
outputs=line_output
)
block_btn.click(
fn=extract_text_block,
inputs=[input_image, start_slider, height_slider],
outputs=line_output
)
debug_btn.click(
fn=debug_voynich_detection,
inputs=input_image,
outputs=[debug_search, debug_enhanced, debug_thresh, debug_result]
)
# Manual rectangle handlers
preview_btn.click(
fn=show_rectangle_preview,
inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
outputs=preview_output
)
extract_manual_btn.click(
fn=manual_extract_rectangle,
inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
outputs=line_output
)
# Auto-update preview when sliders change
for slider in [x_start_slider, y_start_slider, width_slider, height_slider_manual]:
slider.change(
fn=show_rectangle_preview,
inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
outputs=preview_output
)
if __name__ == "__main__":
demo.launch() |