Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import cv2
|
| 3 |
+
import numpy as np
|
| 4 |
+
from PIL import Image
|
| 5 |
+
|
| 6 |
+
def extract_first_line(img_pil):
|
| 7 |
+
if img_pil is None:
|
| 8 |
+
return None
|
| 9 |
+
|
| 10 |
+
# Convert to OpenCV format
|
| 11 |
+
img = np.array(img_pil)
|
| 12 |
+
if len(img.shape) == 3:
|
| 13 |
+
img_cv = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 14 |
+
gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
|
| 15 |
+
else:
|
| 16 |
+
gray = img
|
| 17 |
+
|
| 18 |
+
# Adaptive thresholding for better handling of uneven lighting
|
| 19 |
+
# Use lower threshold values for faded manuscripts
|
| 20 |
+
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
| 21 |
+
cv2.THRESH_BINARY_INV, 11, 2)
|
| 22 |
+
|
| 23 |
+
# Alternative: Try multiple threshold values and pick the best
|
| 24 |
+
# thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
| 25 |
+
|
| 26 |
+
# Smaller morphological kernel to avoid merging separate lines
|
| 27 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 3))
|
| 28 |
+
dilated = cv2.dilate(thresh, kernel, iterations=1)
|
| 29 |
+
|
| 30 |
+
# Find contours (potential lines)
|
| 31 |
+
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 32 |
+
|
| 33 |
+
# Filter contours by size to remove noise
|
| 34 |
+
min_width = img.shape[1] // 10 # Minimum 10% of image width
|
| 35 |
+
min_height = 5 # Minimum height in pixels
|
| 36 |
+
|
| 37 |
+
valid_contours = []
|
| 38 |
+
for contour in contours:
|
| 39 |
+
x, y, w, h = cv2.boundingRect(contour)
|
| 40 |
+
if w >= min_width and h >= min_height:
|
| 41 |
+
valid_contours.append((x, y, w, h))
|
| 42 |
+
|
| 43 |
+
# Sort contours by vertical position (top to bottom)
|
| 44 |
+
if valid_contours:
|
| 45 |
+
bounding_boxes = sorted(valid_contours, key=lambda b: b[1])
|
| 46 |
+
|
| 47 |
+
# Get topmost box (first visible line)
|
| 48 |
+
x, y, w, h = bounding_boxes[0]
|
| 49 |
+
|
| 50 |
+
# Add margin but ensure we don't go out of bounds
|
| 51 |
+
margin = 15
|
| 52 |
+
y_start = max(0, y - margin)
|
| 53 |
+
y_end = min(img.shape[0], y + h + margin)
|
| 54 |
+
x_start = max(0, x - margin)
|
| 55 |
+
x_end = min(img.shape[1], x + w + margin)
|
| 56 |
+
|
| 57 |
+
crop = img[y_start:y_end, x_start:x_end]
|
| 58 |
+
|
| 59 |
+
if crop.size > 0:
|
| 60 |
+
return Image.fromarray(crop)
|
| 61 |
+
|
| 62 |
+
# Fallback: return original image if no lines detected
|
| 63 |
+
return img_pil
|
| 64 |
+
|
| 65 |
+
def preprocess_voynich_image(img_pil):
|
| 66 |
+
"""Additional preprocessing specifically for Voynich manuscript images"""
|
| 67 |
+
if img_pil is None:
|
| 68 |
+
return None
|
| 69 |
+
|
| 70 |
+
img = np.array(img_pil)
|
| 71 |
+
|
| 72 |
+
# Enhance contrast for faded manuscripts
|
| 73 |
+
lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
|
| 74 |
+
l, a, b = cv2.split(lab)
|
| 75 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
| 76 |
+
l = clahe.apply(l)
|
| 77 |
+
enhanced = cv2.merge([l, a, b])
|
| 78 |
+
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
|
| 79 |
+
|
| 80 |
+
return Image.fromarray(enhanced)
|
| 81 |
+
|
| 82 |
+
# Create interface with preprocessing option
|
| 83 |
+
with gr.Blocks(title="Voynich Manuscript Line Extractor") as demo:
|
| 84 |
+
gr.Markdown("# Voynich Manuscript Line Extractor")
|
| 85 |
+
gr.Markdown("Upload a scanned folio of the Voynich manuscript. The app will detect and crop the first visible line of text.")
|
| 86 |
+
|
| 87 |
+
with gr.Row():
|
| 88 |
+
with gr.Column():
|
| 89 |
+
input_image = gr.Image(type="pil", label="Upload Voynich Folio")
|
| 90 |
+
enhance_btn = gr.Button("Enhance Image First")
|
| 91 |
+
extract_btn = gr.Button("Extract First Line")
|
| 92 |
+
|
| 93 |
+
with gr.Column():
|
| 94 |
+
enhanced_output = gr.Image(label="Enhanced Image")
|
| 95 |
+
line_output = gr.Image(label="Extracted First Line")
|
| 96 |
+
|
| 97 |
+
enhance_btn.click(
|
| 98 |
+
fn=preprocess_voynich_image,
|
| 99 |
+
inputs=input_image,
|
| 100 |
+
outputs=enhanced_output
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
extract_btn.click(
|
| 104 |
+
fn=extract_first_line,
|
| 105 |
+
inputs=input_image,
|
| 106 |
+
outputs=line_output
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
if __name__ == "__main__":
|
| 110 |
+
demo.launch()
|