kambris commited on
Commit
cc4d08b
·
verified ·
1 Parent(s): 737e5f8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+
6
+ def extract_first_line(img_pil):
7
+ if img_pil is None:
8
+ return None
9
+
10
+ # Convert to OpenCV format
11
+ img = np.array(img_pil)
12
+ if len(img.shape) == 3:
13
+ img_cv = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
14
+ gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
15
+ else:
16
+ gray = img
17
+
18
+ # Adaptive thresholding for better handling of uneven lighting
19
+ # Use lower threshold values for faded manuscripts
20
+ thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
21
+ cv2.THRESH_BINARY_INV, 11, 2)
22
+
23
+ # Alternative: Try multiple threshold values and pick the best
24
+ # thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
25
+
26
+ # Smaller morphological kernel to avoid merging separate lines
27
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 3))
28
+ dilated = cv2.dilate(thresh, kernel, iterations=1)
29
+
30
+ # Find contours (potential lines)
31
+ contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
32
+
33
+ # Filter contours by size to remove noise
34
+ min_width = img.shape[1] // 10 # Minimum 10% of image width
35
+ min_height = 5 # Minimum height in pixels
36
+
37
+ valid_contours = []
38
+ for contour in contours:
39
+ x, y, w, h = cv2.boundingRect(contour)
40
+ if w >= min_width and h >= min_height:
41
+ valid_contours.append((x, y, w, h))
42
+
43
+ # Sort contours by vertical position (top to bottom)
44
+ if valid_contours:
45
+ bounding_boxes = sorted(valid_contours, key=lambda b: b[1])
46
+
47
+ # Get topmost box (first visible line)
48
+ x, y, w, h = bounding_boxes[0]
49
+
50
+ # Add margin but ensure we don't go out of bounds
51
+ margin = 15
52
+ y_start = max(0, y - margin)
53
+ y_end = min(img.shape[0], y + h + margin)
54
+ x_start = max(0, x - margin)
55
+ x_end = min(img.shape[1], x + w + margin)
56
+
57
+ crop = img[y_start:y_end, x_start:x_end]
58
+
59
+ if crop.size > 0:
60
+ return Image.fromarray(crop)
61
+
62
+ # Fallback: return original image if no lines detected
63
+ return img_pil
64
+
65
+ def preprocess_voynich_image(img_pil):
66
+ """Additional preprocessing specifically for Voynich manuscript images"""
67
+ if img_pil is None:
68
+ return None
69
+
70
+ img = np.array(img_pil)
71
+
72
+ # Enhance contrast for faded manuscripts
73
+ lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
74
+ l, a, b = cv2.split(lab)
75
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
76
+ l = clahe.apply(l)
77
+ enhanced = cv2.merge([l, a, b])
78
+ enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
79
+
80
+ return Image.fromarray(enhanced)
81
+
82
+ # Create interface with preprocessing option
83
+ with gr.Blocks(title="Voynich Manuscript Line Extractor") as demo:
84
+ gr.Markdown("# Voynich Manuscript Line Extractor")
85
+ gr.Markdown("Upload a scanned folio of the Voynich manuscript. The app will detect and crop the first visible line of text.")
86
+
87
+ with gr.Row():
88
+ with gr.Column():
89
+ input_image = gr.Image(type="pil", label="Upload Voynich Folio")
90
+ enhance_btn = gr.Button("Enhance Image First")
91
+ extract_btn = gr.Button("Extract First Line")
92
+
93
+ with gr.Column():
94
+ enhanced_output = gr.Image(label="Enhanced Image")
95
+ line_output = gr.Image(label="Extracted First Line")
96
+
97
+ enhance_btn.click(
98
+ fn=preprocess_voynich_image,
99
+ inputs=input_image,
100
+ outputs=enhanced_output
101
+ )
102
+
103
+ extract_btn.click(
104
+ fn=extract_first_line,
105
+ inputs=input_image,
106
+ outputs=line_output
107
+ )
108
+
109
+ if __name__ == "__main__":
110
+ demo.launch()