Spaces:
Sleeping
Sleeping
File size: 8,706 Bytes
26b6aee ba637e9 28c44fb 6a30f12 26b6aee 94441b4 ba637e9 26b6aee ba637e9 26b6aee 192d1e1 b0c1807 26b6aee 2f9b0a0 26b6aee 2f9b0a0 26b6aee 2f9b0a0 26b6aee 2f9b0a0 26b6aee 2f9b0a0 26b6aee 2f9b0a0 dd447df 2f9b0a0 dd447df 2f9b0a0 28c44fb 2f9b0a0 b0c1807 6a30f12 6bdda47 ba637e9 6a30f12 8ebc49f 6a30f12 6bdda47 ceadc69 2f9b0a0 6a30f12 26b6aee 2f9b0a0 26b6aee ceadc69 26b6aee 2f9b0a0 26b6aee 6bdda47 26b6aee 6bdda47 2f9b0a0 26b6aee 2f9b0a0 26b6aee dcb2ee5 26b6aee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | import os
import subprocess
import cv2 as cv # Ensure OpenCV is installed
import numpy as np
import pytesseract
from pdf2image import convert_from_path
import gradio as gr
import json
from PIL import Image
# Ensure poppler-utils and tesseract-ocr are installed
def install_dependencies():
try:
result = subprocess.run(["bash", "setup.sh"], check=True, capture_output=True, text=True)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"An error occurred while installing dependencies: {e.stderr}")
raise
install_dependencies()
# Function to rescale the frame
def rescale_frame(frame, scale=0.75):
width = int(frame.shape[1] * scale)
height = int(frame.shape[0] * scale)
dimensions = (width, height)
return cv.resize(frame, dimensions, interpolation=cv.INTER_AREA)
# Image Analysis
def analyze_image(image):
analysis = {}
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
# Brightness and contrast
mean_brightness = np.mean(gray)
contrast = gray.std()
analysis['mean_brightness'] = mean_brightness
analysis['contrast'] = contrast
# Noise level
noise = cv.Laplacian(gray, cv.CV_64F).var()
analysis['noise'] = noise
# Skew detection (Hough line transform or other method)
skew_angle = detect_skew(gray)
analysis['skew_angle'] = skew_angle
return analysis
def detect_skew(image):
coords = np.column_stack(np.where(image > 0))
angle = cv.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
return angle
# Adaptive Preprocessing Pipeline
def preprocess_image_adaptive(image):
analysis = analyze_image(image)
# Apply preprocessing steps based on analysis
if analysis['mean_brightness'] < 50:
image = adjust_brightness(image, 1.5)
if analysis['contrast'] < 50:
image = adjust_contrast(image, 1.5)
if analysis['noise'] > 1000:
image = reduce_noise(image)
if abs(analysis['skew_angle']) > 5:
image = deskew(image, analysis['skew_angle'])
# Convert to grayscale and apply adaptive thresholding for binarization
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
binary = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, 11, 2)
return binary
def adjust_brightness(image, factor):
return cv.convertScaleAbs(image, alpha=factor, beta=0)
def adjust_contrast(image, alpha):
return cv.convertScaleAbs(image, alpha=alpha, beta=0)
def reduce_noise(image):
return cv.fastNlMeansDenoisingColored(image, None, 30, 30, 7, 21)
def deskew(image, angle):
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv.getRotationMatrix2D(center, angle, 1.0)
rotated = cv.warpAffine(image, M, (w, h), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE)
return rotated
def convert_to_pil(image):
if image is None or image.size == 0:
print("Error: Empty image passed to convert_to_pil")
return None
print("Converting image to PIL format")
# Ensure the array is in uint8 format
if image.dtype != np.uint8:
image = image.astype(np.uint8)
return Image.fromarray(cv.cvtColor(image, cv.COLOR_BGR2RGB))
def extract_text_from_image(image, langs='tel+osd+eng'):
pil_image = convert_to_pil(image)
if pil_image is None:
print("Error: Failed to convert image to PIL format")
return ""
custom_config = r'--oem 3 --psm 6'
try:
return pytesseract.image_to_string(pil_image, lang=langs, config=custom_config)
except pytesseract.TesseractError as e:
print(f"Tesseract error: {e}")
return ""
def process_image(img):
preprocessed = preprocess_image_adaptive(img)
if preprocessed is None:
return ""
return extract_text_from_image(preprocessed)
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
all_texts = {}
def save_and_next(page_num, text, extracted_texts, original_images, total_pages):
page_num = int(page_num) # Ensure page_num is an integer
total_pages = int(total_pages) # Ensure total_pages is an integer
formatted_text = {
f"Page number: {page_num}": {
"Content": [
line for line in text.split('\n') if line.strip() != ''
]
}
}
all_texts.update(formatted_text)
json_path = os.path.join(output_dir, "all_texts.json")
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(all_texts, f, ensure_ascii=False, indent=4)
next_page_num = page_num + 1 # Increment to next page
if next_page_num <= total_pages:
next_page_image = original_images[next_page_num - 1]
text = process_image(next_page_image)
extracted_texts.append(text)
return gr.update(value=text), next_page_num, gr.update(value=next_page_image, height=None, width=None), json_path
else:
return "All pages processed", page_num, None, json_path
def skip_page(page_num, extracted_texts, original_images, total_pages):
next_page_num = int(page_num) + 1 # Ensure page_num is an integer and increment to next page
total_pages = int(total_pages) # Ensure total_pages is an integer
if next_page_num <= total_pages:
next_page_image = original_images[next_page_num - 1]
text = process_image(next_page_image)
extracted_texts.append(text)
return gr.update(value=text), next_page_num, gr.update(value=next_page_image, height=None, width=None)
else:
return "All pages processed", page_num, None
def upload_pdf(pdf):
pdf_path = pdf.name
pages = convert_from_path(pdf_path)
if not pages:
print("Error: No pages found in PDF")
return "Error: No pages found in PDF", None, 0, [], [], 0
print(f"PDF converted to {len(pages)} images")
first_page = np.array(pages[0])
if first_page is None or first_page.size == 0:
print("Error: First page is empty")
return "Error: First page is empty", None, 0, [], [], 0
text = process_image(first_page)
original_images = [np.array(page) for page in pages]
extracted_texts = [text]
return gr.update(value=original_images[0], height=None, width=None), gr.update(value=text), 1, extracted_texts, original_images, len(pages)
def navigate_to_page(page_num, extracted_texts, original_images):
page_num = int(page_num) # Ensure page_num is an integer
if 0 <= page_num - 1 < len(original_images):
return gr.update(value=original_images[page_num - 1], height=None, width=None), gr.update(value=extracted_texts[page_num - 1]), page_num
else:
return gr.update(value="Invalid Page Number"), None, page_num
def display_pdf_and_text():
with gr.Blocks() as demo:
gr.Markdown("## PDF Viewer and Text Editor")
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
with gr.Row():
image_output = gr.Image(label="Page Image", type="numpy")
text_editor = gr.Textbox(label="Extracted Text", lines=10, interactive=True)
page_num = gr.Number(value=1, label="Page Number", visible=True)
extracted_texts = gr.State()
original_images = gr.State()
total_pages = gr.State()
save_next_button = gr.Button("Save and Next")
skip_button = gr.Button("Skip")
pdf_input.upload(upload_pdf, inputs=pdf_input, outputs=[image_output, text_editor, page_num, extracted_texts, original_images, total_pages])
save_next_button.click(fn=save_and_next,
inputs=[page_num, text_editor, extracted_texts, original_images, total_pages],
outputs=[text_editor, page_num, image_output, gr.File(label="Download JSON")])
skip_button.click(fn=skip_page,
inputs=[page_num, extracted_texts, original_images, total_pages],
outputs=[text_editor, page_num, image_output])
page_buttons = gr.Row()
def update_page_buttons(total_pages, extracted_texts, original_images):
buttons = []
for i in range(1, total_pages + 1):
button = gr.Button(str(i), variant="primary", size="small")
button.click(navigate_to_page, inputs=[i, extracted_texts, original_images], outputs=[image_output, text_editor, page_num])
buttons.append(button)
return buttons
total_pages.change(fn=update_page_buttons, inputs=[total_pages, extracted_texts, original_images], outputs=[page_buttons])
return demo
iface = display_pdf_and_text()
iface.launch()
|