Spaces:

kambris
/

PrimeVlines

Sleeping

App Files Files Community

PrimeVlines / app.py

kambris

Update app.py

3b6cab5 verified 7 months ago

raw

history blame contribute delete

14.5 kB

	import gradio as gr
	import cv2
	import numpy as np
	from PIL import Image

	def find_text_lines_voynich(img_pil):
	"""Specialized function to find actual Voynich text lines, not page edges"""
	if img_pil is None:
	return None

	# Convert to OpenCV format
	img = np.array(img_pil)
	if len(img.shape) == 3:
	gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
	else:
	gray = img

	img_height, img_width = gray.shape
	print(f"Processing image: {img_width}x{img_height}")

	# Skip the top portion where page edges and headers might be
	# Look for text in the middle and lower portions
	skip_top = int(img_height * 0.15) # Skip top 15%
	search_area = gray[skip_top:, :]

	print(f"Searching in area starting from y={skip_top}")

	# Enhance contrast specifically for faded manuscript text
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
	enhanced = clahe.apply(search_area)

	# Use adaptive thresholding which works better for manuscripts
	thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, 11, 2)

	# Create a SMALLER horizontal kernel to connect characters within words
	# Keep it smaller to avoid connecting different lines
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1)) # Reduced from (8, 1)
	connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

	# Find contours
	contours, _ = cv2.findContours(connected, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	print(f"Found {len(contours)} contours in search area")

	# Filter for text-like contours
	text_contours = []
	search_height, search_width = search_area.shape

	for i, contour in enumerate(contours):
	x, y, w, h = cv2.boundingRect(contour)
	area = cv2.contourArea(contour)

	# Calculate properties
	aspect_ratio = w / h if h > 0 else 0
	width_percent = (w / search_width) * 100
	height_percent = (h / search_height) * 100

	print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}")

	# MORE RESTRICTIVE criteria for single text lines:
	if (w >= search_width * 0.15 and # Minimum width
	h >= 8 and # Minimum height (reduced from 10)
	h <= search_height * 0.03 and # SMALLER maximum height (reduced from 0.05 to 0.03)
	aspect_ratio >= 5.0 and # HIGHER aspect ratio (increased from 3.0 to 5.0)
	width_percent <= 85 and # Tighter width limit (reduced from 90 to 85)
	height_percent <= 3.0): # Additional height percentage limit

	text_contours.append((contour, x, y + skip_top, w, h)) # Add skip_top back to y
	print(f" ✓ ACCEPTED as text line")
	else:
	print(f" ✗ REJECTED")

	print(f"Found {len(text_contours)} potential text lines")

	if text_contours:
	# Sort by y-coordinate to get the topmost text line
	text_contours.sort(key=lambda x: x[2]) # Sort by y position

	# Take the first text line found
	contour, x, y, w, h = text_contours[0]

	print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}")

	# Extract with SMALLER margins to get tighter crop
	margin_x = 15 # Reduced from 30
	margin_y = 10 # Reduced from 20
	y_start = max(0, y - margin_y)
	y_end = min(img_height, y + h + margin_y)
	x_start = max(0, x - margin_x)
	x_end = min(img_width, x + w + margin_x)

	extracted = img[y_start:y_end, x_start:x_end]

	if extracted.size > 0:
	print(f"Successfully extracted line: {extracted.shape}")
	return Image.fromarray(extracted)

	# Fallback: If no text lines found, try scanning line by line in lower portion
	print("No contours found, trying line-by-line scan...")
	return scan_for_text_lines(img, skip_top)

	def scan_for_text_lines(img, start_y):
	"""Scan line by line looking for text content - modified for single lines"""
	if len(img.shape) == 3:
	gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
	else:
	gray = img

	img_height, img_width = gray.shape

	# Scan from start_y downward
	for y in range(start_y, img_height - 25, 5): # Smaller strip, check every 5 pixels
	# Take a SMALLER strip (25 pixels instead of 40)
	strip = gray[y:y+25, :]

	# Apply threshold
	_, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

	# Count dark pixels (ink)
	ink_pixels = np.sum(thresh > 0)
	total_pixels = strip.shape[0] * strip.shape[1]
	ink_ratio = ink_pixels / total_pixels

	# Also check if the ink is distributed horizontally (like text)
	# Sum ink pixels in each row
	row_sums = np.sum(thresh, axis=1)
	rows_with_ink = np.sum(row_sums > 0)

	print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}")

	# More restrictive criteria for single lines
	if ink_ratio > 0.02 and ink_ratio < 0.15 and rows_with_ink >= 3 and rows_with_ink <= 15:
	# Expand the region but keep it smaller
	y_start = max(0, y - 8) # Reduced margin
	y_end = min(img_height, y + 33) # Smaller total height

	if len(img.shape) == 3:
	extracted = img[y_start:y_end, :]
	else:
	extracted = gray[y_start:y_end, :]

	print(f"Found text at y={y}, extracting region {y_start}:{y_end}")
	return Image.fromarray(extracted)

	# If still nothing found, return a smaller middle section
	print("No text found, returning smaller middle section")
	mid_y = img_height // 2
	section = img[mid_y:mid_y + img_height//8, :] # Smaller section (1/8 instead of 1/4)
	return Image.fromarray(section)

	def preprocess_voynich_image(img_pil):
	"""Enhanced preprocessing for Voynich manuscript images"""
	if img_pil is None:
	return None

	img = np.array(img_pil)

	# Convert to LAB color space
	lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
	l, a, b = cv2.split(lab)

	# Apply CLAHE to L channel
	clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
	l = clahe.apply(l)

	# Merge channels back
	enhanced = cv2.merge([l, a, b])
	enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)

	return Image.fromarray(enhanced)

	def debug_voynich_detection(img_pil):
	"""Debug function showing the detection process"""
	if img_pil is None:
	return None, None, None, None

	img = np.array(img_pil)
	if len(img.shape) == 3:
	gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
	else:
	gray = img

	img_height, img_width = gray.shape

	# Show the search area (skip top 5%)
	skip_top = int(img_height * 0.05)
	search_area = gray[skip_top:, :]

	# Create a visualization showing the search area
	search_viz = np.copy(gray)
	cv2.rectangle(search_viz, (0, skip_top), (img_width, img_height), (128), 2)

	# Apply CLAHE to search area
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
	enhanced = clahe.apply(search_area)

	# Apply threshold
	thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, 11, 2)

	# Show full-size threshold result
	thresh_full = np.zeros_like(gray)
	thresh_full[skip_top:, :] = thresh

	# Get the final result
	result = find_text_lines_voynich(img_pil)

	return (Image.fromarray(search_viz),
	Image.fromarray(enhanced),
	Image.fromarray(thresh_full),
	result)

	def extract_text_block(img_pil, start_percent=0.2, height_percent=0.4):
	"""Extract a block of text from a specific region"""
	if img_pil is None:
	return None

	img = np.array(img_pil)
	img_height = img.shape[0]

	start_y = int(img_height * start_percent)
	block_height = int(img_height * height_percent)
	end_y = min(img_height, start_y + block_height)

	block = img[start_y:end_y, :]
	return Image.fromarray(block)

	def manual_extract_rectangle(img_pil, x_start_percent=0.0, y_start_percent=0.2,
	width_percent=1.0, height_percent=0.15):
	"""Manually extract a rectangular region from the image"""
	if img_pil is None:
	return None

	img = np.array(img_pil)
	img_height, img_width = img.shape[:2]

	# Convert percentages to pixel coordinates
	x_start = int(img_width * x_start_percent)
	y_start = int(img_height * y_start_percent)
	width = int(img_width * width_percent)
	height = int(img_height * height_percent)

	# Ensure coordinates are within image bounds
	x_start = max(0, min(x_start, img_width - 1))
	y_start = max(0, min(y_start, img_height - 1))
	x_end = min(img_width, x_start + width)
	y_end = min(img_height, y_start + height)

	# Extract the rectangle
	rectangle = img[y_start:y_end, x_start:x_end]

	print(f"Manual extract: x={x_start}:{x_end}, y={y_start}:{y_end}, size={rectangle.shape}")

	if rectangle.size > 0:
	return Image.fromarray(rectangle)
	else:
	return None

	def show_rectangle_preview(img_pil, x_start_percent=0.0, y_start_percent=0.2,
	width_percent=1.0, height_percent=0.15):
	"""Show a preview of the rectangle that will be extracted"""
	if img_pil is None:
	return None

	img = np.array(img_pil)
	img_height, img_width = img.shape[:2]

	# Convert percentages to pixel coordinates
	x_start = int(img_width * x_start_percent)
	y_start = int(img_height * y_start_percent)
	width = int(img_width * width_percent)
	height = int(img_height * height_percent)

	# Ensure coordinates are within image bounds
	x_start = max(0, min(x_start, img_width - 1))
	y_start = max(0, min(y_start, img_height - 1))
	x_end = min(img_width, x_start + width)
	y_end = min(img_height, y_start + height)

	# Create a copy of the image to draw on
	preview = np.copy(img)

	# Draw rectangle outline
	cv2.rectangle(preview, (x_start, y_start), (x_end, y_end), (255, 0, 0), 2)

	# Optional: Add semi-transparent overlay to show selected area
	overlay = np.copy(preview)
	cv2.rectangle(overlay, (x_start, y_start), (x_end, y_end), (0, 255, 0), -1)
	preview = cv2.addWeighted(preview, 0.8, overlay, 0.2, 0)

	return Image.fromarray(preview)

	# Enhanced Gradio interface
	with gr.Blocks(title="Voynich Text Line Extractor - Single Line Focus") as demo:
	gr.Markdown("# Voynich Text Line Extractor - Single Line Focus")
	gr.Markdown("This version is optimized to extract single text lines with tighter bounding boxes.")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="pil", label="Upload Voynich Folio")

	with gr.Tab("Auto Extract"):
	enhance_btn = gr.Button("Enhance Image")
	extract_btn = gr.Button("Find Text Lines")
	block_btn = gr.Button("Extract Text Block")
	debug_btn = gr.Button("Debug Detection")

	# Add slider for text block extraction
	start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)")
	height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)")

	with gr.Tab("Manual Rectangle"):
	gr.Markdown("### Manual Rectangle Selection")
	gr.Markdown("Adjust the sliders to manually select a rectangular region")

	x_start_slider = gr.Slider(0.0, 0.9, 0.0, step=0.01, label="X Start (% from left)")
	y_start_slider = gr.Slider(0.0, 0.9, 0.2, step=0.01, label="Y Start (% from top)")
	width_slider = gr.Slider(0.1, 1.0, 1.0, step=0.01, label="Width (% of image)")
	height_slider_manual = gr.Slider(0.05, 0.5, 0.15, step=0.01, label="Height (% of image)")

	preview_btn = gr.Button("Preview Rectangle")
	extract_manual_btn = gr.Button("Extract Rectangle")

	with gr.Column():
	enhanced_output = gr.Image(label="Enhanced Image")
	line_output = gr.Image(label="Extracted Text")
	preview_output = gr.Image(label="Rectangle Preview")

	with gr.Row():
	debug_search = gr.Image(label="1. Search Area")
	debug_enhanced = gr.Image(label="2. Enhanced")
	debug_thresh = gr.Image(label="3. Threshold")
	debug_result = gr.Image(label="4. Result")

	# Auto extract button handlers
	enhance_btn.click(
	fn=preprocess_voynich_image,
	inputs=input_image,
	outputs=enhanced_output
	)

	extract_btn.click(
	fn=find_text_lines_voynich,
	inputs=input_image,
	outputs=line_output
	)

	block_btn.click(
	fn=extract_text_block,
	inputs=[input_image, start_slider, height_slider],
	outputs=line_output
	)

	debug_btn.click(
	fn=debug_voynich_detection,
	inputs=input_image,
	outputs=[debug_search, debug_enhanced, debug_thresh, debug_result]
	)

	# Manual rectangle handlers
	preview_btn.click(
	fn=show_rectangle_preview,
	inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
	outputs=preview_output
	)

	extract_manual_btn.click(
	fn=manual_extract_rectangle,
	inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
	outputs=line_output
	)

	# Auto-update preview when sliders change
	for slider in [x_start_slider, y_start_slider, width_slider, height_slider_manual]:
	slider.change(
	fn=show_rectangle_preview,
	inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
	outputs=preview_output
	)

	if __name__ == "__main__":
	demo.launch()