import gradio as gr import cv2 import numpy as np import pytesseract #pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" pytesseract.pytesseract.tesseract_cmd = "tesseract" from PIL import Image from docx import Document import io # ---------- Step 1: Image preprocessing ---------- def preprocess_image(image): gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY) gray = cv2.bilateralFilter(gray, 11, 17, 17) edges = cv2.Canny(gray, 30, 200) contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] doc_contour = None for contour in contours: perimeter = cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True) if len(approx) == 4: doc_contour = approx break if doc_contour is not None: pts = doc_contour.reshape(4, 2) rect = np.zeros((4, 2), dtype="float32") s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] (tl, tr, br, bl) = rect widthA = np.linalg.norm(br - bl) widthB = np.linalg.norm(tr - tl) heightA = np.linalg.norm(tr - br) heightB = np.linalg.norm(tl - bl) maxWidth = int(max(widthA, widthB)) maxHeight = int(max(heightA, heightB)) dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1] ], dtype="float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(np.array(image), M, (maxWidth, maxHeight)) return Image.fromarray(warped) else: return image # ---------- Step 2: OCR & Word export ---------- def extract_text(image): preprocessed = preprocess_image(image) text = pytesseract.image_to_string(preprocessed, lang="eng+ara") # Save as DOCX doc = Document() for line in text.splitlines(): if line.strip(): doc.add_paragraph(line) buffer = io.BytesIO() doc.save(buffer) buffer.seek(0) return text, (buffer, "document_output.docx") # ---------- Step 3: Gradio UI ---------- def process_image(image): text, file_buffer = extract_text(image) return text, file_buffer with gr.Blocks(title="AI Document OCR (Light Version)") as demo: gr.Markdown("## 📄 AI Document OCR (Light Version)\nUpload a scanned or skewed document, and the model will correct it and extract formatted text.") with gr.Row(): input_image = gr.Image(type="pil", label="Upload Document") with gr.Row(): output_text = gr.Textbox(label="Extracted Text", lines=15) output_file = gr.File(label="Download as .docx") with gr.Row(): submit_btn = gr.Button("Extract Text") submit_btn.click(process_image, inputs=input_image, outputs=[output_text, output_file]) demo.launch()