Spaces:
Sleeping
Sleeping
| import os | |
| import warnings | |
| # Mute the harmless PaddleOCR deprecation warnings so your terminal stays clean | |
| warnings.filterwarnings("ignore", category=DeprecationWarning) | |
| # Disable the buggy PIR engine and Intel MKLDNN operations | |
| os.environ["FLAGS_enable_pir_api"] = "0" | |
| os.environ["FLAGS_use_mkldnn"] = "0" | |
| os.environ["OMP_NUM_THREADS"] = "1" | |
| os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True" | |
| import gradio as gr | |
| from paddleocr import PaddleOCR | |
| import fitz # PyMuPDF | |
| from PIL import Image, ImageDraw | |
| import numpy as np | |
| import cv2 | |
| # Initialize OCR Engine | |
| ocr = PaddleOCR(lang='en', use_textline_orientation=True) | |
| def draw_boxes(image_pil, result): | |
| """Draws red bounding boxes around detected text.""" | |
| draw = ImageDraw.Draw(image_pil) | |
| if result and result[0]: | |
| for line in result[0]: | |
| box = line[0] | |
| points = [(point[0], point[1]) for point in box] | |
| draw.polygon(points, outline="red", width=2) | |
| return image_pil | |
| def extract_text(input_file): | |
| if input_file is None: | |
| return "Please upload a file.", [] | |
| file_path = input_file.name | |
| full_text = "" | |
| output_images = [] | |
| try: | |
| if file_path.lower().endswith('.pdf'): | |
| doc = fitz.open(file_path) | |
| for page_num in range(len(doc)): | |
| page = doc.load_page(page_num) | |
| pix = page.get_pixmap(dpi=200) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| img_np = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) | |
| # REVERTED: Back to the stable .ocr() method | |
| result = ocr.ocr(img_np) | |
| img_with_boxes = draw_boxes(img.copy(), result) | |
| output_images.append(img_with_boxes) | |
| if result and result[0]: | |
| page_text = "\n".join([line[1][0] for line in result[0]]) | |
| full_text += f"--- Page {page_num + 1} ---\n{page_text}\n\n" | |
| else: | |
| full_text += f"--- Page {page_num + 1} ---\nNo text found.\n\n" | |
| else: | |
| # Process as Image | |
| img = Image.open(file_path).convert("RGB") | |
| img_np = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) | |
| # REVERTED: Back to the stable .ocr() method | |
| result = ocr.ocr(img_np) | |
| img_with_boxes = draw_boxes(img.copy(), result) | |
| output_images.append(img_with_boxes) | |
| if result and result[0]: | |
| full_text = "\n".join([line[1][0] for line in result[0]]) | |
| else: | |
| full_text = "No text detected." | |
| except Exception as e: | |
| return f"Error during OCR: {str(e)}", [] | |
| return full_text, output_images | |
| # Build the Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 📄 PaddleOCR: Image & PDF Text Extraction") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File(label="Upload Image or PDF", file_types=[".pdf", ".jpg", ".png", ".jpeg"]) | |
| submit_btn = gr.Button("Extract Text", variant="primary") | |
| with gr.Column(): | |
| text_output = gr.Textbox(label="Extracted Text", lines=15) | |
| with gr.Row(): | |
| image_output = gr.Gallery(label="Detected Regions", columns=2) | |
| submit_btn.click( | |
| fn=extract_text, | |
| inputs=file_input, | |
| outputs=[text_output, image_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(theme=gr.themes.Soft()) |