mohamed12ahmed's picture
Update app.py
a2e1f84 verified
import gradio as gr
import cv2
import numpy as np
import pytesseract
#pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
pytesseract.pytesseract.tesseract_cmd = "tesseract"
from PIL import Image
from docx import Document
import io
# ---------- Step 1: Image preprocessing ----------
def preprocess_image(image):
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edges = cv2.Canny(gray, 30, 200)
contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
doc_contour = None
for contour in contours:
perimeter = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
if len(approx) == 4:
doc_contour = approx
break
if doc_contour is not None:
pts = doc_contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
(tl, tr, br, bl) = rect
widthA = np.linalg.norm(br - bl)
widthB = np.linalg.norm(tr - tl)
heightA = np.linalg.norm(tr - br)
heightB = np.linalg.norm(tl - bl)
maxWidth = int(max(widthA, widthB))
maxHeight = int(max(heightA, heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]
], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(np.array(image), M, (maxWidth, maxHeight))
return Image.fromarray(warped)
else:
return image
# ---------- Step 2: OCR & Word export ----------
def extract_text(image):
preprocessed = preprocess_image(image)
text = pytesseract.image_to_string(preprocessed, lang="eng+ara")
# Save as DOCX
doc = Document()
for line in text.splitlines():
if line.strip():
doc.add_paragraph(line)
buffer = io.BytesIO()
doc.save(buffer)
buffer.seek(0)
return text, (buffer, "document_output.docx")
# ---------- Step 3: Gradio UI ----------
def process_image(image):
text, file_buffer = extract_text(image)
return text, file_buffer
with gr.Blocks(title="AI Document OCR (Light Version)") as demo:
gr.Markdown("## ๐Ÿ“„ AI Document OCR (Light Version)\nUpload a scanned or skewed document, and the model will correct it and extract formatted text.")
with gr.Row():
input_image = gr.Image(type="pil", label="Upload Document")
with gr.Row():
output_text = gr.Textbox(label="Extracted Text", lines=15)
output_file = gr.File(label="Download as .docx")
with gr.Row():
submit_btn = gr.Button("Extract Text")
submit_btn.click(process_image, inputs=input_image, outputs=[output_text, output_file])
demo.launch()