PrimeVlines / app.py
kambris's picture
Update app.py
3b6cab5 verified
import gradio as gr
import cv2
import numpy as np
from PIL import Image
def find_text_lines_voynich(img_pil):
"""Specialized function to find actual Voynich text lines, not page edges"""
if img_pil is None:
return None
# Convert to OpenCV format
img = np.array(img_pil)
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray = img
img_height, img_width = gray.shape
print(f"Processing image: {img_width}x{img_height}")
# Skip the top portion where page edges and headers might be
# Look for text in the middle and lower portions
skip_top = int(img_height * 0.15) # Skip top 15%
search_area = gray[skip_top:, :]
print(f"Searching in area starting from y={skip_top}")
# Enhance contrast specifically for faded manuscript text
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(search_area)
# Use adaptive thresholding which works better for manuscripts
thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# Create a SMALLER horizontal kernel to connect characters within words
# Keep it smaller to avoid connecting different lines
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1)) # Reduced from (8, 1)
connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# Find contours
contours, _ = cv2.findContours(connected, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
print(f"Found {len(contours)} contours in search area")
# Filter for text-like contours
text_contours = []
search_height, search_width = search_area.shape
for i, contour in enumerate(contours):
x, y, w, h = cv2.boundingRect(contour)
area = cv2.contourArea(contour)
# Calculate properties
aspect_ratio = w / h if h > 0 else 0
width_percent = (w / search_width) * 100
height_percent = (h / search_height) * 100
print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}")
# MORE RESTRICTIVE criteria for single text lines:
if (w >= search_width * 0.15 and # Minimum width
h >= 8 and # Minimum height (reduced from 10)
h <= search_height * 0.03 and # SMALLER maximum height (reduced from 0.05 to 0.03)
aspect_ratio >= 5.0 and # HIGHER aspect ratio (increased from 3.0 to 5.0)
width_percent <= 85 and # Tighter width limit (reduced from 90 to 85)
height_percent <= 3.0): # Additional height percentage limit
text_contours.append((contour, x, y + skip_top, w, h)) # Add skip_top back to y
print(f" ✓ ACCEPTED as text line")
else:
print(f" ✗ REJECTED")
print(f"Found {len(text_contours)} potential text lines")
if text_contours:
# Sort by y-coordinate to get the topmost text line
text_contours.sort(key=lambda x: x[2]) # Sort by y position
# Take the first text line found
contour, x, y, w, h = text_contours[0]
print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}")
# Extract with SMALLER margins to get tighter crop
margin_x = 15 # Reduced from 30
margin_y = 10 # Reduced from 20
y_start = max(0, y - margin_y)
y_end = min(img_height, y + h + margin_y)
x_start = max(0, x - margin_x)
x_end = min(img_width, x + w + margin_x)
extracted = img[y_start:y_end, x_start:x_end]
if extracted.size > 0:
print(f"Successfully extracted line: {extracted.shape}")
return Image.fromarray(extracted)
# Fallback: If no text lines found, try scanning line by line in lower portion
print("No contours found, trying line-by-line scan...")
return scan_for_text_lines(img, skip_top)
def scan_for_text_lines(img, start_y):
"""Scan line by line looking for text content - modified for single lines"""
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray = img
img_height, img_width = gray.shape
# Scan from start_y downward
for y in range(start_y, img_height - 25, 5): # Smaller strip, check every 5 pixels
# Take a SMALLER strip (25 pixels instead of 40)
strip = gray[y:y+25, :]
# Apply threshold
_, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Count dark pixels (ink)
ink_pixels = np.sum(thresh > 0)
total_pixels = strip.shape[0] * strip.shape[1]
ink_ratio = ink_pixels / total_pixels
# Also check if the ink is distributed horizontally (like text)
# Sum ink pixels in each row
row_sums = np.sum(thresh, axis=1)
rows_with_ink = np.sum(row_sums > 0)
print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}")
# More restrictive criteria for single lines
if ink_ratio > 0.02 and ink_ratio < 0.15 and rows_with_ink >= 3 and rows_with_ink <= 15:
# Expand the region but keep it smaller
y_start = max(0, y - 8) # Reduced margin
y_end = min(img_height, y + 33) # Smaller total height
if len(img.shape) == 3:
extracted = img[y_start:y_end, :]
else:
extracted = gray[y_start:y_end, :]
print(f"Found text at y={y}, extracting region {y_start}:{y_end}")
return Image.fromarray(extracted)
# If still nothing found, return a smaller middle section
print("No text found, returning smaller middle section")
mid_y = img_height // 2
section = img[mid_y:mid_y + img_height//8, :] # Smaller section (1/8 instead of 1/4)
return Image.fromarray(section)
def preprocess_voynich_image(img_pil):
"""Enhanced preprocessing for Voynich manuscript images"""
if img_pil is None:
return None
img = np.array(img_pil)
# Convert to LAB color space
lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
l, a, b = cv2.split(lab)
# Apply CLAHE to L channel
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
l = clahe.apply(l)
# Merge channels back
enhanced = cv2.merge([l, a, b])
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
return Image.fromarray(enhanced)
def debug_voynich_detection(img_pil):
"""Debug function showing the detection process"""
if img_pil is None:
return None, None, None, None
img = np.array(img_pil)
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray = img
img_height, img_width = gray.shape
# Show the search area (skip top 5%)
skip_top = int(img_height * 0.05)
search_area = gray[skip_top:, :]
# Create a visualization showing the search area
search_viz = np.copy(gray)
cv2.rectangle(search_viz, (0, skip_top), (img_width, img_height), (128), 2)
# Apply CLAHE to search area
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(search_area)
# Apply threshold
thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# Show full-size threshold result
thresh_full = np.zeros_like(gray)
thresh_full[skip_top:, :] = thresh
# Get the final result
result = find_text_lines_voynich(img_pil)
return (Image.fromarray(search_viz),
Image.fromarray(enhanced),
Image.fromarray(thresh_full),
result)
def extract_text_block(img_pil, start_percent=0.2, height_percent=0.4):
"""Extract a block of text from a specific region"""
if img_pil is None:
return None
img = np.array(img_pil)
img_height = img.shape[0]
start_y = int(img_height * start_percent)
block_height = int(img_height * height_percent)
end_y = min(img_height, start_y + block_height)
block = img[start_y:end_y, :]
return Image.fromarray(block)
def manual_extract_rectangle(img_pil, x_start_percent=0.0, y_start_percent=0.2,
width_percent=1.0, height_percent=0.15):
"""Manually extract a rectangular region from the image"""
if img_pil is None:
return None
img = np.array(img_pil)
img_height, img_width = img.shape[:2]
# Convert percentages to pixel coordinates
x_start = int(img_width * x_start_percent)
y_start = int(img_height * y_start_percent)
width = int(img_width * width_percent)
height = int(img_height * height_percent)
# Ensure coordinates are within image bounds
x_start = max(0, min(x_start, img_width - 1))
y_start = max(0, min(y_start, img_height - 1))
x_end = min(img_width, x_start + width)
y_end = min(img_height, y_start + height)
# Extract the rectangle
rectangle = img[y_start:y_end, x_start:x_end]
print(f"Manual extract: x={x_start}:{x_end}, y={y_start}:{y_end}, size={rectangle.shape}")
if rectangle.size > 0:
return Image.fromarray(rectangle)
else:
return None
def show_rectangle_preview(img_pil, x_start_percent=0.0, y_start_percent=0.2,
width_percent=1.0, height_percent=0.15):
"""Show a preview of the rectangle that will be extracted"""
if img_pil is None:
return None
img = np.array(img_pil)
img_height, img_width = img.shape[:2]
# Convert percentages to pixel coordinates
x_start = int(img_width * x_start_percent)
y_start = int(img_height * y_start_percent)
width = int(img_width * width_percent)
height = int(img_height * height_percent)
# Ensure coordinates are within image bounds
x_start = max(0, min(x_start, img_width - 1))
y_start = max(0, min(y_start, img_height - 1))
x_end = min(img_width, x_start + width)
y_end = min(img_height, y_start + height)
# Create a copy of the image to draw on
preview = np.copy(img)
# Draw rectangle outline
cv2.rectangle(preview, (x_start, y_start), (x_end, y_end), (255, 0, 0), 2)
# Optional: Add semi-transparent overlay to show selected area
overlay = np.copy(preview)
cv2.rectangle(overlay, (x_start, y_start), (x_end, y_end), (0, 255, 0), -1)
preview = cv2.addWeighted(preview, 0.8, overlay, 0.2, 0)
return Image.fromarray(preview)
# Enhanced Gradio interface
with gr.Blocks(title="Voynich Text Line Extractor - Single Line Focus") as demo:
gr.Markdown("# Voynich Text Line Extractor - Single Line Focus")
gr.Markdown("This version is optimized to extract single text lines with tighter bounding boxes.")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Upload Voynich Folio")
with gr.Tab("Auto Extract"):
enhance_btn = gr.Button("Enhance Image")
extract_btn = gr.Button("Find Text Lines")
block_btn = gr.Button("Extract Text Block")
debug_btn = gr.Button("Debug Detection")
# Add slider for text block extraction
start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)")
height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)")
with gr.Tab("Manual Rectangle"):
gr.Markdown("### Manual Rectangle Selection")
gr.Markdown("Adjust the sliders to manually select a rectangular region")
x_start_slider = gr.Slider(0.0, 0.9, 0.0, step=0.01, label="X Start (% from left)")
y_start_slider = gr.Slider(0.0, 0.9, 0.2, step=0.01, label="Y Start (% from top)")
width_slider = gr.Slider(0.1, 1.0, 1.0, step=0.01, label="Width (% of image)")
height_slider_manual = gr.Slider(0.05, 0.5, 0.15, step=0.01, label="Height (% of image)")
preview_btn = gr.Button("Preview Rectangle")
extract_manual_btn = gr.Button("Extract Rectangle")
with gr.Column():
enhanced_output = gr.Image(label="Enhanced Image")
line_output = gr.Image(label="Extracted Text")
preview_output = gr.Image(label="Rectangle Preview")
with gr.Row():
debug_search = gr.Image(label="1. Search Area")
debug_enhanced = gr.Image(label="2. Enhanced")
debug_thresh = gr.Image(label="3. Threshold")
debug_result = gr.Image(label="4. Result")
# Auto extract button handlers
enhance_btn.click(
fn=preprocess_voynich_image,
inputs=input_image,
outputs=enhanced_output
)
extract_btn.click(
fn=find_text_lines_voynich,
inputs=input_image,
outputs=line_output
)
block_btn.click(
fn=extract_text_block,
inputs=[input_image, start_slider, height_slider],
outputs=line_output
)
debug_btn.click(
fn=debug_voynich_detection,
inputs=input_image,
outputs=[debug_search, debug_enhanced, debug_thresh, debug_result]
)
# Manual rectangle handlers
preview_btn.click(
fn=show_rectangle_preview,
inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
outputs=preview_output
)
extract_manual_btn.click(
fn=manual_extract_rectangle,
inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
outputs=line_output
)
# Auto-update preview when sliders change
for slider in [x_start_slider, y_start_slider, width_slider, height_slider_manual]:
slider.change(
fn=show_rectangle_preview,
inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
outputs=preview_output
)
if __name__ == "__main__":
demo.launch()