WBWORKSPACE / app.py
WahabNoman's picture
Update app.py
5989aca verified
import os
import warnings
# Mute the harmless PaddleOCR deprecation warnings so your terminal stays clean
warnings.filterwarnings("ignore", category=DeprecationWarning)
# Disable the buggy PIR engine and Intel MKLDNN operations
os.environ["FLAGS_enable_pir_api"] = "0"
os.environ["FLAGS_use_mkldnn"] = "0"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
import gradio as gr
from paddleocr import PaddleOCR
import fitz # PyMuPDF
from PIL import Image, ImageDraw
import numpy as np
import cv2
# Initialize OCR Engine
ocr = PaddleOCR(lang='en', use_textline_orientation=True)
def draw_boxes(image_pil, result):
"""Draws red bounding boxes around detected text."""
draw = ImageDraw.Draw(image_pil)
if result and result[0]:
for line in result[0]:
box = line[0]
points = [(point[0], point[1]) for point in box]
draw.polygon(points, outline="red", width=2)
return image_pil
def extract_text(input_file):
if input_file is None:
return "Please upload a file.", []
file_path = input_file.name
full_text = ""
output_images = []
try:
if file_path.lower().endswith('.pdf'):
doc = fitz.open(file_path)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
pix = page.get_pixmap(dpi=200)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
img_np = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# REVERTED: Back to the stable .ocr() method
result = ocr.ocr(img_np)
img_with_boxes = draw_boxes(img.copy(), result)
output_images.append(img_with_boxes)
if result and result[0]:
page_text = "\n".join([line[1][0] for line in result[0]])
full_text += f"--- Page {page_num + 1} ---\n{page_text}\n\n"
else:
full_text += f"--- Page {page_num + 1} ---\nNo text found.\n\n"
else:
# Process as Image
img = Image.open(file_path).convert("RGB")
img_np = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# REVERTED: Back to the stable .ocr() method
result = ocr.ocr(img_np)
img_with_boxes = draw_boxes(img.copy(), result)
output_images.append(img_with_boxes)
if result and result[0]:
full_text = "\n".join([line[1][0] for line in result[0]])
else:
full_text = "No text detected."
except Exception as e:
return f"Error during OCR: {str(e)}", []
return full_text, output_images
# Build the Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 📄 PaddleOCR: Image & PDF Text Extraction")
with gr.Row():
with gr.Column():
file_input = gr.File(label="Upload Image or PDF", file_types=[".pdf", ".jpg", ".png", ".jpeg"])
submit_btn = gr.Button("Extract Text", variant="primary")
with gr.Column():
text_output = gr.Textbox(label="Extracted Text", lines=15)
with gr.Row():
image_output = gr.Gallery(label="Detected Regions", columns=2)
submit_btn.click(
fn=extract_text,
inputs=file_input,
outputs=[text_output, image_output]
)
if __name__ == "__main__":
demo.launch(theme=gr.themes.Soft())