Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| def find_text_lines_voynich(img_pil): | |
| """Specialized function to find actual Voynich text lines, not page edges""" | |
| if img_pil is None: | |
| return None | |
| # Convert to OpenCV format | |
| img = np.array(img_pil) | |
| if len(img.shape) == 3: | |
| gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) | |
| else: | |
| gray = img | |
| img_height, img_width = gray.shape | |
| print(f"Processing image: {img_width}x{img_height}") | |
| # Skip the top portion where page edges and headers might be | |
| # Look for text in the middle and lower portions | |
| skip_top = int(img_height * 0.15) # Skip top 15% | |
| search_area = gray[skip_top:, :] | |
| print(f"Searching in area starting from y={skip_top}") | |
| # Enhance contrast specifically for faded manuscript text | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) | |
| enhanced = clahe.apply(search_area) | |
| # Use adaptive thresholding which works better for manuscripts | |
| thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY_INV, 11, 2) | |
| # Create a SMALLER horizontal kernel to connect characters within words | |
| # Keep it smaller to avoid connecting different lines | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1)) # Reduced from (8, 1) | |
| connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) | |
| # Find contours | |
| contours, _ = cv2.findContours(connected, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| print(f"Found {len(contours)} contours in search area") | |
| # Filter for text-like contours | |
| text_contours = [] | |
| search_height, search_width = search_area.shape | |
| for i, contour in enumerate(contours): | |
| x, y, w, h = cv2.boundingRect(contour) | |
| area = cv2.contourArea(contour) | |
| # Calculate properties | |
| aspect_ratio = w / h if h > 0 else 0 | |
| width_percent = (w / search_width) * 100 | |
| height_percent = (h / search_height) * 100 | |
| print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}") | |
| # MORE RESTRICTIVE criteria for single text lines: | |
| if (w >= search_width * 0.15 and # Minimum width | |
| h >= 8 and # Minimum height (reduced from 10) | |
| h <= search_height * 0.03 and # SMALLER maximum height (reduced from 0.05 to 0.03) | |
| aspect_ratio >= 5.0 and # HIGHER aspect ratio (increased from 3.0 to 5.0) | |
| width_percent <= 85 and # Tighter width limit (reduced from 90 to 85) | |
| height_percent <= 3.0): # Additional height percentage limit | |
| text_contours.append((contour, x, y + skip_top, w, h)) # Add skip_top back to y | |
| print(f" ✓ ACCEPTED as text line") | |
| else: | |
| print(f" ✗ REJECTED") | |
| print(f"Found {len(text_contours)} potential text lines") | |
| if text_contours: | |
| # Sort by y-coordinate to get the topmost text line | |
| text_contours.sort(key=lambda x: x[2]) # Sort by y position | |
| # Take the first text line found | |
| contour, x, y, w, h = text_contours[0] | |
| print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}") | |
| # Extract with SMALLER margins to get tighter crop | |
| margin_x = 15 # Reduced from 30 | |
| margin_y = 10 # Reduced from 20 | |
| y_start = max(0, y - margin_y) | |
| y_end = min(img_height, y + h + margin_y) | |
| x_start = max(0, x - margin_x) | |
| x_end = min(img_width, x + w + margin_x) | |
| extracted = img[y_start:y_end, x_start:x_end] | |
| if extracted.size > 0: | |
| print(f"Successfully extracted line: {extracted.shape}") | |
| return Image.fromarray(extracted) | |
| # Fallback: If no text lines found, try scanning line by line in lower portion | |
| print("No contours found, trying line-by-line scan...") | |
| return scan_for_text_lines(img, skip_top) | |
| def scan_for_text_lines(img, start_y): | |
| """Scan line by line looking for text content - modified for single lines""" | |
| if len(img.shape) == 3: | |
| gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) | |
| else: | |
| gray = img | |
| img_height, img_width = gray.shape | |
| # Scan from start_y downward | |
| for y in range(start_y, img_height - 25, 5): # Smaller strip, check every 5 pixels | |
| # Take a SMALLER strip (25 pixels instead of 40) | |
| strip = gray[y:y+25, :] | |
| # Apply threshold | |
| _, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) | |
| # Count dark pixels (ink) | |
| ink_pixels = np.sum(thresh > 0) | |
| total_pixels = strip.shape[0] * strip.shape[1] | |
| ink_ratio = ink_pixels / total_pixels | |
| # Also check if the ink is distributed horizontally (like text) | |
| # Sum ink pixels in each row | |
| row_sums = np.sum(thresh, axis=1) | |
| rows_with_ink = np.sum(row_sums > 0) | |
| print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}") | |
| # More restrictive criteria for single lines | |
| if ink_ratio > 0.02 and ink_ratio < 0.15 and rows_with_ink >= 3 and rows_with_ink <= 15: | |
| # Expand the region but keep it smaller | |
| y_start = max(0, y - 8) # Reduced margin | |
| y_end = min(img_height, y + 33) # Smaller total height | |
| if len(img.shape) == 3: | |
| extracted = img[y_start:y_end, :] | |
| else: | |
| extracted = gray[y_start:y_end, :] | |
| print(f"Found text at y={y}, extracting region {y_start}:{y_end}") | |
| return Image.fromarray(extracted) | |
| # If still nothing found, return a smaller middle section | |
| print("No text found, returning smaller middle section") | |
| mid_y = img_height // 2 | |
| section = img[mid_y:mid_y + img_height//8, :] # Smaller section (1/8 instead of 1/4) | |
| return Image.fromarray(section) | |
| def preprocess_voynich_image(img_pil): | |
| """Enhanced preprocessing for Voynich manuscript images""" | |
| if img_pil is None: | |
| return None | |
| img = np.array(img_pil) | |
| # Convert to LAB color space | |
| lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB) | |
| l, a, b = cv2.split(lab) | |
| # Apply CLAHE to L channel | |
| clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) | |
| l = clahe.apply(l) | |
| # Merge channels back | |
| enhanced = cv2.merge([l, a, b]) | |
| enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB) | |
| return Image.fromarray(enhanced) | |
| def debug_voynich_detection(img_pil): | |
| """Debug function showing the detection process""" | |
| if img_pil is None: | |
| return None, None, None, None | |
| img = np.array(img_pil) | |
| if len(img.shape) == 3: | |
| gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) | |
| else: | |
| gray = img | |
| img_height, img_width = gray.shape | |
| # Show the search area (skip top 5%) | |
| skip_top = int(img_height * 0.05) | |
| search_area = gray[skip_top:, :] | |
| # Create a visualization showing the search area | |
| search_viz = np.copy(gray) | |
| cv2.rectangle(search_viz, (0, skip_top), (img_width, img_height), (128), 2) | |
| # Apply CLAHE to search area | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) | |
| enhanced = clahe.apply(search_area) | |
| # Apply threshold | |
| thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY_INV, 11, 2) | |
| # Show full-size threshold result | |
| thresh_full = np.zeros_like(gray) | |
| thresh_full[skip_top:, :] = thresh | |
| # Get the final result | |
| result = find_text_lines_voynich(img_pil) | |
| return (Image.fromarray(search_viz), | |
| Image.fromarray(enhanced), | |
| Image.fromarray(thresh_full), | |
| result) | |
| def extract_text_block(img_pil, start_percent=0.2, height_percent=0.4): | |
| """Extract a block of text from a specific region""" | |
| if img_pil is None: | |
| return None | |
| img = np.array(img_pil) | |
| img_height = img.shape[0] | |
| start_y = int(img_height * start_percent) | |
| block_height = int(img_height * height_percent) | |
| end_y = min(img_height, start_y + block_height) | |
| block = img[start_y:end_y, :] | |
| return Image.fromarray(block) | |
| def manual_extract_rectangle(img_pil, x_start_percent=0.0, y_start_percent=0.2, | |
| width_percent=1.0, height_percent=0.15): | |
| """Manually extract a rectangular region from the image""" | |
| if img_pil is None: | |
| return None | |
| img = np.array(img_pil) | |
| img_height, img_width = img.shape[:2] | |
| # Convert percentages to pixel coordinates | |
| x_start = int(img_width * x_start_percent) | |
| y_start = int(img_height * y_start_percent) | |
| width = int(img_width * width_percent) | |
| height = int(img_height * height_percent) | |
| # Ensure coordinates are within image bounds | |
| x_start = max(0, min(x_start, img_width - 1)) | |
| y_start = max(0, min(y_start, img_height - 1)) | |
| x_end = min(img_width, x_start + width) | |
| y_end = min(img_height, y_start + height) | |
| # Extract the rectangle | |
| rectangle = img[y_start:y_end, x_start:x_end] | |
| print(f"Manual extract: x={x_start}:{x_end}, y={y_start}:{y_end}, size={rectangle.shape}") | |
| if rectangle.size > 0: | |
| return Image.fromarray(rectangle) | |
| else: | |
| return None | |
| def show_rectangle_preview(img_pil, x_start_percent=0.0, y_start_percent=0.2, | |
| width_percent=1.0, height_percent=0.15): | |
| """Show a preview of the rectangle that will be extracted""" | |
| if img_pil is None: | |
| return None | |
| img = np.array(img_pil) | |
| img_height, img_width = img.shape[:2] | |
| # Convert percentages to pixel coordinates | |
| x_start = int(img_width * x_start_percent) | |
| y_start = int(img_height * y_start_percent) | |
| width = int(img_width * width_percent) | |
| height = int(img_height * height_percent) | |
| # Ensure coordinates are within image bounds | |
| x_start = max(0, min(x_start, img_width - 1)) | |
| y_start = max(0, min(y_start, img_height - 1)) | |
| x_end = min(img_width, x_start + width) | |
| y_end = min(img_height, y_start + height) | |
| # Create a copy of the image to draw on | |
| preview = np.copy(img) | |
| # Draw rectangle outline | |
| cv2.rectangle(preview, (x_start, y_start), (x_end, y_end), (255, 0, 0), 2) | |
| # Optional: Add semi-transparent overlay to show selected area | |
| overlay = np.copy(preview) | |
| cv2.rectangle(overlay, (x_start, y_start), (x_end, y_end), (0, 255, 0), -1) | |
| preview = cv2.addWeighted(preview, 0.8, overlay, 0.2, 0) | |
| return Image.fromarray(preview) | |
| # Enhanced Gradio interface | |
| with gr.Blocks(title="Voynich Text Line Extractor - Single Line Focus") as demo: | |
| gr.Markdown("# Voynich Text Line Extractor - Single Line Focus") | |
| gr.Markdown("This version is optimized to extract single text lines with tighter bounding boxes.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image(type="pil", label="Upload Voynich Folio") | |
| with gr.Tab("Auto Extract"): | |
| enhance_btn = gr.Button("Enhance Image") | |
| extract_btn = gr.Button("Find Text Lines") | |
| block_btn = gr.Button("Extract Text Block") | |
| debug_btn = gr.Button("Debug Detection") | |
| # Add slider for text block extraction | |
| start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)") | |
| height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)") | |
| with gr.Tab("Manual Rectangle"): | |
| gr.Markdown("### Manual Rectangle Selection") | |
| gr.Markdown("Adjust the sliders to manually select a rectangular region") | |
| x_start_slider = gr.Slider(0.0, 0.9, 0.0, step=0.01, label="X Start (% from left)") | |
| y_start_slider = gr.Slider(0.0, 0.9, 0.2, step=0.01, label="Y Start (% from top)") | |
| width_slider = gr.Slider(0.1, 1.0, 1.0, step=0.01, label="Width (% of image)") | |
| height_slider_manual = gr.Slider(0.05, 0.5, 0.15, step=0.01, label="Height (% of image)") | |
| preview_btn = gr.Button("Preview Rectangle") | |
| extract_manual_btn = gr.Button("Extract Rectangle") | |
| with gr.Column(): | |
| enhanced_output = gr.Image(label="Enhanced Image") | |
| line_output = gr.Image(label="Extracted Text") | |
| preview_output = gr.Image(label="Rectangle Preview") | |
| with gr.Row(): | |
| debug_search = gr.Image(label="1. Search Area") | |
| debug_enhanced = gr.Image(label="2. Enhanced") | |
| debug_thresh = gr.Image(label="3. Threshold") | |
| debug_result = gr.Image(label="4. Result") | |
| # Auto extract button handlers | |
| enhance_btn.click( | |
| fn=preprocess_voynich_image, | |
| inputs=input_image, | |
| outputs=enhanced_output | |
| ) | |
| extract_btn.click( | |
| fn=find_text_lines_voynich, | |
| inputs=input_image, | |
| outputs=line_output | |
| ) | |
| block_btn.click( | |
| fn=extract_text_block, | |
| inputs=[input_image, start_slider, height_slider], | |
| outputs=line_output | |
| ) | |
| debug_btn.click( | |
| fn=debug_voynich_detection, | |
| inputs=input_image, | |
| outputs=[debug_search, debug_enhanced, debug_thresh, debug_result] | |
| ) | |
| # Manual rectangle handlers | |
| preview_btn.click( | |
| fn=show_rectangle_preview, | |
| inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual], | |
| outputs=preview_output | |
| ) | |
| extract_manual_btn.click( | |
| fn=manual_extract_rectangle, | |
| inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual], | |
| outputs=line_output | |
| ) | |
| # Auto-update preview when sliders change | |
| for slider in [x_start_slider, y_start_slider, width_slider, height_slider_manual]: | |
| slider.change( | |
| fn=show_rectangle_preview, | |
| inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual], | |
| outputs=preview_output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |