Spaces:

KarthiEz
/

Paddleocr

Running

App Files Files Community

Paddleocr / app.py

KarthiEz

Update app.py

402ad7d verified about 2 months ago

raw

history blame

4.95 kB

	import os
	import io
	import sys
	import json
	import traceback
	from typing import List, Tuple

	import numpy as np
	from PIL import Image
	import fitz # PyMuPDF
	import cv2
	import gradio as gr
	from paddleocr import PaddleOCR

	# --------- Config knobs (safe defaults) ----------
	LANG = os.getenv("OCR_LANG", "en") # e.g., "en", "ar", "en_number", "en_PP-OCRv3"
	USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true"
	DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
	REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
	CLS = True # angle classification
	CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0")) # 0.0 → keep everything

	# Initialize once (download models once, reuse across requests)
	# Tip: If you want Arabic/English mixed, set LANG="ar" or "en" variants per PaddleOCR docs
	OCR = PaddleOCR(
	use_angle_cls=CLS,
	lang=LANG,
	use_gpu=USE_GPU,
	det_model_dir=None, # use default
	rec_model_dir=None, # use default
	show_log=False
	)

	def _pil_to_cv(img: Image.Image) -> np.ndarray:
	"""PIL RGB -> OpenCV BGR ndarray"""
	return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
	"""
	Run OCR on a PIL image and return list of (text, confidence).
	"""
	img_cv = _pil_to_cv(pil_img)
	result = OCR.ocr(img_cv, cls=CLS)
	lines: List[Tuple[str, float]] = []
	if not result:
	return lines
	# PaddleOCR returns a list per image; each item has [ [box, (text, conf)], ... ]
	for line in result[0]:
	txt = line[1][0]
	conf = float(line[1][1])
	if conf >= CONF_THRESHOLD:
	lines.append((txt, conf))
	return lines

	def read_image(filepath: str) -> Image.Image:
	"""
	Open an image robustly via PIL (also handles TIFF, JPG, PNG).
	"""
	with Image.open(filepath) as im:
	return im.convert("RGB")

	def read_pdf_pages(filepath: str) -> List[Image.Image]:
	"""
	Render each PDF page to a PIL image (RGB) using PyMuPDF.
	"""
	pages: List[Image.Image] = []
	with fitz.open(filepath) as doc:
	for page in doc:
	# Render with a scale factor for better OCR accuracy
	mat = fitz.Matrix(2, 2) # 2x upscaling
	pix = page.get_pixmap(matrix=mat, alpha=False)
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	pages.append(img)
	return pages

	def extract_text_from_file(filepath: str) -> str:
	"""
	Dispatch by file type; return plain text.
	"""
	lower = filepath.lower()
	if lower.endswith(".pdf"):
	pages = read_pdf_pages(filepath)
	all_text: List[str] = []
	for i, pil_img in enumerate(pages, start=1):
	lines = ocr_image(pil_img)
	page_text = "\n".join([t for t, _ in lines])
	# Add a page header for clarity on multi-page docs
	all_text.append(f"--- Page {i} ---\n{page_text}".strip())
	return "\n\n".join([s for s in all_text if s])
	elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
	img = read_image(filepath)
	lines = ocr_image(img)
	return "\n".join([t for t, _ in lines]).strip()
	else:
	raise ValueError("Unsupported file type. Please upload an image (PNG/JPG/TIFF/WEBP/BMP) or a PDF.")

	def infer(file_obj) -> str:
	try:
	if file_obj is None:
	return "No file uploaded."
	filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
	text = extract_text_from_file(filepath)
	# 🔊 Console telemetry: dump raw text to terminal
	print("\n================ OCR RAW TEXT ================\n")
	print(text)
	print("\n==================== END =====================\n", flush=True)
	return text or "[No text detected]"
	except Exception as e:
	traceback.print_exc()
	return f"Error during OCR: {e}"

	# ------------- Gradio UI ----------------
	TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
	DESC = (
	"Upload an image or PDF. The app runs PaddleOCR (PP-OCRv4 pipeline) and returns plain text. "
	"Set `OCR_LANG`, `OCR_USE_GPU`, and `OCR_CONF_THRESHOLD` as env vars to tune."
	)

	with gr.Blocks(title=TITLE) as demo:
	gr.Markdown(f"# {TITLE}\n{DESC}")
	with gr.Row():
	file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
	out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
	run_btn = gr.Button("Run OCR", variant="primary")

	run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
	# Also trigger on file change for convenience
	file_in.change(fn=infer, inputs=[file_in], outputs=[out])

	if __name__ == "__main__":
	# Tip: Set server_name="0.0.0.0" for containers; share=True for quick external testing
	demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)