Spaces:

WahabNoman
/

WBWORKSPACE

Sleeping

App Files Files Community

WBWORKSPACE / app.py

WahabNoman

Update app.py

5989aca verified about 2 months ago

raw

history blame contribute delete

3.6 kB

	import os
	import warnings

	# Mute the harmless PaddleOCR deprecation warnings so your terminal stays clean
	warnings.filterwarnings("ignore", category=DeprecationWarning)

	# Disable the buggy PIR engine and Intel MKLDNN operations
	os.environ["FLAGS_enable_pir_api"] = "0"
	os.environ["FLAGS_use_mkldnn"] = "0"
	os.environ["OMP_NUM_THREADS"] = "1"
	os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"

	import gradio as gr
	from paddleocr import PaddleOCR
	import fitz # PyMuPDF
	from PIL import Image, ImageDraw
	import numpy as np
	import cv2

	# Initialize OCR Engine
	ocr = PaddleOCR(lang='en', use_textline_orientation=True)

	def draw_boxes(image_pil, result):
	"""Draws red bounding boxes around detected text."""
	draw = ImageDraw.Draw(image_pil)
	if result and result[0]:
	for line in result[0]:
	box = line[0]
	points = [(point[0], point[1]) for point in box]
	draw.polygon(points, outline="red", width=2)
	return image_pil

	def extract_text(input_file):
	if input_file is None:
	return "Please upload a file.", []

	file_path = input_file.name
	full_text = ""
	output_images = []

	try:
	if file_path.lower().endswith('.pdf'):
	doc = fitz.open(file_path)
	for page_num in range(len(doc)):
	page = doc.load_page(page_num)
	pix = page.get_pixmap(dpi=200)
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	img_np = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	# REVERTED: Back to the stable .ocr() method
	result = ocr.ocr(img_np)

	img_with_boxes = draw_boxes(img.copy(), result)
	output_images.append(img_with_boxes)

	if result and result[0]:
	page_text = "\n".join([line[1][0] for line in result[0]])
	full_text += f"--- Page {page_num + 1} ---\n{page_text}\n\n"
	else:
	full_text += f"--- Page {page_num + 1} ---\nNo text found.\n\n"
	else:
	# Process as Image
	img = Image.open(file_path).convert("RGB")
	img_np = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	# REVERTED: Back to the stable .ocr() method
	result = ocr.ocr(img_np)

	img_with_boxes = draw_boxes(img.copy(), result)
	output_images.append(img_with_boxes)

	if result and result[0]:
	full_text = "\n".join([line[1][0] for line in result[0]])
	else:
	full_text = "No text detected."

	except Exception as e:
	return f"Error during OCR: {str(e)}", []

	return full_text, output_images

	# Build the Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 📄 PaddleOCR: Image & PDF Text Extraction")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(label="Upload Image or PDF", file_types=[".pdf", ".jpg", ".png", ".jpeg"])
	submit_btn = gr.Button("Extract Text", variant="primary")

	with gr.Column():
	text_output = gr.Textbox(label="Extracted Text", lines=15)

	with gr.Row():
	image_output = gr.Gallery(label="Detected Regions", columns=2)

	submit_btn.click(
	fn=extract_text,
	inputs=file_input,
	outputs=[text_output, image_output]
	)

	if __name__ == "__main__":
	demo.launch(theme=gr.themes.Soft())