import gradio as gr import cv2 import numpy as np from PIL import Image def find_text_lines_voynich(img_pil): """Specialized function to find actual Voynich text lines, not page edges""" if img_pil is None: return None # Convert to OpenCV format img = np.array(img_pil) if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) else: gray = img img_height, img_width = gray.shape print(f"Processing image: {img_width}x{img_height}") # Skip the top portion where page edges and headers might be # Look for text in the middle and lower portions skip_top = int(img_height * 0.15) # Skip top 15% search_area = gray[skip_top:, :] print(f"Searching in area starting from y={skip_top}") # Enhance contrast specifically for faded manuscript text clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) enhanced = clahe.apply(search_area) # Use adaptive thresholding which works better for manuscripts thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) # Create a SMALLER horizontal kernel to connect characters within words # Keep it smaller to avoid connecting different lines kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1)) # Reduced from (8, 1) connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) # Find contours contours, _ = cv2.findContours(connected, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) print(f"Found {len(contours)} contours in search area") # Filter for text-like contours text_contours = [] search_height, search_width = search_area.shape for i, contour in enumerate(contours): x, y, w, h = cv2.boundingRect(contour) area = cv2.contourArea(contour) # Calculate properties aspect_ratio = w / h if h > 0 else 0 width_percent = (w / search_width) * 100 height_percent = (h / search_height) * 100 print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}") # MORE RESTRICTIVE criteria for single text lines: if (w >= search_width * 0.15 and # Minimum width h >= 8 and # Minimum height (reduced from 10) h <= search_height * 0.03 and # SMALLER maximum height (reduced from 0.05 to 0.03) aspect_ratio >= 5.0 and # HIGHER aspect ratio (increased from 3.0 to 5.0) width_percent <= 85 and # Tighter width limit (reduced from 90 to 85) height_percent <= 3.0): # Additional height percentage limit text_contours.append((contour, x, y + skip_top, w, h)) # Add skip_top back to y print(f" ✓ ACCEPTED as text line") else: print(f" ✗ REJECTED") print(f"Found {len(text_contours)} potential text lines") if text_contours: # Sort by y-coordinate to get the topmost text line text_contours.sort(key=lambda x: x[2]) # Sort by y position # Take the first text line found contour, x, y, w, h = text_contours[0] print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}") # Extract with SMALLER margins to get tighter crop margin_x = 15 # Reduced from 30 margin_y = 10 # Reduced from 20 y_start = max(0, y - margin_y) y_end = min(img_height, y + h + margin_y) x_start = max(0, x - margin_x) x_end = min(img_width, x + w + margin_x) extracted = img[y_start:y_end, x_start:x_end] if extracted.size > 0: print(f"Successfully extracted line: {extracted.shape}") return Image.fromarray(extracted) # Fallback: If no text lines found, try scanning line by line in lower portion print("No contours found, trying line-by-line scan...") return scan_for_text_lines(img, skip_top) def scan_for_text_lines(img, start_y): """Scan line by line looking for text content - modified for single lines""" if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) else: gray = img img_height, img_width = gray.shape # Scan from start_y downward for y in range(start_y, img_height - 25, 5): # Smaller strip, check every 5 pixels # Take a SMALLER strip (25 pixels instead of 40) strip = gray[y:y+25, :] # Apply threshold _, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # Count dark pixels (ink) ink_pixels = np.sum(thresh > 0) total_pixels = strip.shape[0] * strip.shape[1] ink_ratio = ink_pixels / total_pixels # Also check if the ink is distributed horizontally (like text) # Sum ink pixels in each row row_sums = np.sum(thresh, axis=1) rows_with_ink = np.sum(row_sums > 0) print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}") # More restrictive criteria for single lines if ink_ratio > 0.02 and ink_ratio < 0.15 and rows_with_ink >= 3 and rows_with_ink <= 15: # Expand the region but keep it smaller y_start = max(0, y - 8) # Reduced margin y_end = min(img_height, y + 33) # Smaller total height if len(img.shape) == 3: extracted = img[y_start:y_end, :] else: extracted = gray[y_start:y_end, :] print(f"Found text at y={y}, extracting region {y_start}:{y_end}") return Image.fromarray(extracted) # If still nothing found, return a smaller middle section print("No text found, returning smaller middle section") mid_y = img_height // 2 section = img[mid_y:mid_y + img_height//8, :] # Smaller section (1/8 instead of 1/4) return Image.fromarray(section) def preprocess_voynich_image(img_pil): """Enhanced preprocessing for Voynich manuscript images""" if img_pil is None: return None img = np.array(img_pil) # Convert to LAB color space lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB) l, a, b = cv2.split(lab) # Apply CLAHE to L channel clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) l = clahe.apply(l) # Merge channels back enhanced = cv2.merge([l, a, b]) enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB) return Image.fromarray(enhanced) def debug_voynich_detection(img_pil): """Debug function showing the detection process""" if img_pil is None: return None, None, None, None img = np.array(img_pil) if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) else: gray = img img_height, img_width = gray.shape # Show the search area (skip top 5%) skip_top = int(img_height * 0.05) search_area = gray[skip_top:, :] # Create a visualization showing the search area search_viz = np.copy(gray) cv2.rectangle(search_viz, (0, skip_top), (img_width, img_height), (128), 2) # Apply CLAHE to search area clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) enhanced = clahe.apply(search_area) # Apply threshold thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) # Show full-size threshold result thresh_full = np.zeros_like(gray) thresh_full[skip_top:, :] = thresh # Get the final result result = find_text_lines_voynich(img_pil) return (Image.fromarray(search_viz), Image.fromarray(enhanced), Image.fromarray(thresh_full), result) def extract_text_block(img_pil, start_percent=0.2, height_percent=0.4): """Extract a block of text from a specific region""" if img_pil is None: return None img = np.array(img_pil) img_height = img.shape[0] start_y = int(img_height * start_percent) block_height = int(img_height * height_percent) end_y = min(img_height, start_y + block_height) block = img[start_y:end_y, :] return Image.fromarray(block) def manual_extract_rectangle(img_pil, x_start_percent=0.0, y_start_percent=0.2, width_percent=1.0, height_percent=0.15): """Manually extract a rectangular region from the image""" if img_pil is None: return None img = np.array(img_pil) img_height, img_width = img.shape[:2] # Convert percentages to pixel coordinates x_start = int(img_width * x_start_percent) y_start = int(img_height * y_start_percent) width = int(img_width * width_percent) height = int(img_height * height_percent) # Ensure coordinates are within image bounds x_start = max(0, min(x_start, img_width - 1)) y_start = max(0, min(y_start, img_height - 1)) x_end = min(img_width, x_start + width) y_end = min(img_height, y_start + height) # Extract the rectangle rectangle = img[y_start:y_end, x_start:x_end] print(f"Manual extract: x={x_start}:{x_end}, y={y_start}:{y_end}, size={rectangle.shape}") if rectangle.size > 0: return Image.fromarray(rectangle) else: return None def show_rectangle_preview(img_pil, x_start_percent=0.0, y_start_percent=0.2, width_percent=1.0, height_percent=0.15): """Show a preview of the rectangle that will be extracted""" if img_pil is None: return None img = np.array(img_pil) img_height, img_width = img.shape[:2] # Convert percentages to pixel coordinates x_start = int(img_width * x_start_percent) y_start = int(img_height * y_start_percent) width = int(img_width * width_percent) height = int(img_height * height_percent) # Ensure coordinates are within image bounds x_start = max(0, min(x_start, img_width - 1)) y_start = max(0, min(y_start, img_height - 1)) x_end = min(img_width, x_start + width) y_end = min(img_height, y_start + height) # Create a copy of the image to draw on preview = np.copy(img) # Draw rectangle outline cv2.rectangle(preview, (x_start, y_start), (x_end, y_end), (255, 0, 0), 2) # Optional: Add semi-transparent overlay to show selected area overlay = np.copy(preview) cv2.rectangle(overlay, (x_start, y_start), (x_end, y_end), (0, 255, 0), -1) preview = cv2.addWeighted(preview, 0.8, overlay, 0.2, 0) return Image.fromarray(preview) # Enhanced Gradio interface with gr.Blocks(title="Voynich Text Line Extractor - Single Line Focus") as demo: gr.Markdown("# Voynich Text Line Extractor - Single Line Focus") gr.Markdown("This version is optimized to extract single text lines with tighter bounding boxes.") with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Upload Voynich Folio") with gr.Tab("Auto Extract"): enhance_btn = gr.Button("Enhance Image") extract_btn = gr.Button("Find Text Lines") block_btn = gr.Button("Extract Text Block") debug_btn = gr.Button("Debug Detection") # Add slider for text block extraction start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)") height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)") with gr.Tab("Manual Rectangle"): gr.Markdown("### Manual Rectangle Selection") gr.Markdown("Adjust the sliders to manually select a rectangular region") x_start_slider = gr.Slider(0.0, 0.9, 0.0, step=0.01, label="X Start (% from left)") y_start_slider = gr.Slider(0.0, 0.9, 0.2, step=0.01, label="Y Start (% from top)") width_slider = gr.Slider(0.1, 1.0, 1.0, step=0.01, label="Width (% of image)") height_slider_manual = gr.Slider(0.05, 0.5, 0.15, step=0.01, label="Height (% of image)") preview_btn = gr.Button("Preview Rectangle") extract_manual_btn = gr.Button("Extract Rectangle") with gr.Column(): enhanced_output = gr.Image(label="Enhanced Image") line_output = gr.Image(label="Extracted Text") preview_output = gr.Image(label="Rectangle Preview") with gr.Row(): debug_search = gr.Image(label="1. Search Area") debug_enhanced = gr.Image(label="2. Enhanced") debug_thresh = gr.Image(label="3. Threshold") debug_result = gr.Image(label="4. Result") # Auto extract button handlers enhance_btn.click( fn=preprocess_voynich_image, inputs=input_image, outputs=enhanced_output ) extract_btn.click( fn=find_text_lines_voynich, inputs=input_image, outputs=line_output ) block_btn.click( fn=extract_text_block, inputs=[input_image, start_slider, height_slider], outputs=line_output ) debug_btn.click( fn=debug_voynich_detection, inputs=input_image, outputs=[debug_search, debug_enhanced, debug_thresh, debug_result] ) # Manual rectangle handlers preview_btn.click( fn=show_rectangle_preview, inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual], outputs=preview_output ) extract_manual_btn.click( fn=manual_extract_rectangle, inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual], outputs=line_output ) # Auto-update preview when sliders change for slider in [x_start_slider, y_start_slider, width_slider, height_slider_manual]: slider.change( fn=show_rectangle_preview, inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual], outputs=preview_output ) if __name__ == "__main__": demo.launch()