Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import cv2 | |
| import numpy as np | |
| import pytesseract | |
| #pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" | |
| pytesseract.pytesseract.tesseract_cmd = "tesseract" | |
| from PIL import Image | |
| from docx import Document | |
| import io | |
| # ---------- Step 1: Image preprocessing ---------- | |
| def preprocess_image(image): | |
| gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY) | |
| gray = cv2.bilateralFilter(gray, 11, 17, 17) | |
| edges = cv2.Canny(gray, 30, 200) | |
| contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) | |
| contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] | |
| doc_contour = None | |
| for contour in contours: | |
| perimeter = cv2.arcLength(contour, True) | |
| approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True) | |
| if len(approx) == 4: | |
| doc_contour = approx | |
| break | |
| if doc_contour is not None: | |
| pts = doc_contour.reshape(4, 2) | |
| rect = np.zeros((4, 2), dtype="float32") | |
| s = pts.sum(axis=1) | |
| rect[0] = pts[np.argmin(s)] | |
| rect[2] = pts[np.argmax(s)] | |
| diff = np.diff(pts, axis=1) | |
| rect[1] = pts[np.argmin(diff)] | |
| rect[3] = pts[np.argmax(diff)] | |
| (tl, tr, br, bl) = rect | |
| widthA = np.linalg.norm(br - bl) | |
| widthB = np.linalg.norm(tr - tl) | |
| heightA = np.linalg.norm(tr - br) | |
| heightB = np.linalg.norm(tl - bl) | |
| maxWidth = int(max(widthA, widthB)) | |
| maxHeight = int(max(heightA, heightB)) | |
| dst = np.array([ | |
| [0, 0], | |
| [maxWidth - 1, 0], | |
| [maxWidth - 1, maxHeight - 1], | |
| [0, maxHeight - 1] | |
| ], dtype="float32") | |
| M = cv2.getPerspectiveTransform(rect, dst) | |
| warped = cv2.warpPerspective(np.array(image), M, (maxWidth, maxHeight)) | |
| return Image.fromarray(warped) | |
| else: | |
| return image | |
| # ---------- Step 2: OCR & Word export ---------- | |
| def extract_text(image): | |
| preprocessed = preprocess_image(image) | |
| text = pytesseract.image_to_string(preprocessed, lang="eng+ara") | |
| # Save as DOCX | |
| doc = Document() | |
| for line in text.splitlines(): | |
| if line.strip(): | |
| doc.add_paragraph(line) | |
| buffer = io.BytesIO() | |
| doc.save(buffer) | |
| buffer.seek(0) | |
| return text, (buffer, "document_output.docx") | |
| # ---------- Step 3: Gradio UI ---------- | |
| def process_image(image): | |
| text, file_buffer = extract_text(image) | |
| return text, file_buffer | |
| with gr.Blocks(title="AI Document OCR (Light Version)") as demo: | |
| gr.Markdown("## ๐ AI Document OCR (Light Version)\nUpload a scanned or skewed document, and the model will correct it and extract formatted text.") | |
| with gr.Row(): | |
| input_image = gr.Image(type="pil", label="Upload Document") | |
| with gr.Row(): | |
| output_text = gr.Textbox(label="Extracted Text", lines=15) | |
| output_file = gr.File(label="Download as .docx") | |
| with gr.Row(): | |
| submit_btn = gr.Button("Extract Text") | |
| submit_btn.click(process_image, inputs=input_image, outputs=[output_text, output_file]) | |
| demo.launch() | |