Paddleocr / app.py
KarthiEz's picture
Update app.py
402ad7d verified
raw
history blame
4.95 kB
import os
import io
import sys
import json
import traceback
from typing import List, Tuple
import numpy as np
from PIL import Image
import fitz # PyMuPDF
import cv2
import gradio as gr
from paddleocr import PaddleOCR
# --------- Config knobs (safe defaults) ----------
LANG = os.getenv("OCR_LANG", "en") # e.g., "en", "ar", "en_number", "en_PP-OCRv3"
USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true"
DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
CLS = True # angle classification
CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0")) # 0.0 → keep everything
# Initialize once (download models once, reuse across requests)
# Tip: If you want Arabic/English mixed, set LANG="ar" or "en" variants per PaddleOCR docs
OCR = PaddleOCR(
use_angle_cls=CLS,
lang=LANG,
use_gpu=USE_GPU,
det_model_dir=None, # use default
rec_model_dir=None, # use default
show_log=False
)
def _pil_to_cv(img: Image.Image) -> np.ndarray:
"""PIL RGB -> OpenCV BGR ndarray"""
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
"""
Run OCR on a PIL image and return list of (text, confidence).
"""
img_cv = _pil_to_cv(pil_img)
result = OCR.ocr(img_cv, cls=CLS)
lines: List[Tuple[str, float]] = []
if not result:
return lines
# PaddleOCR returns a list per image; each item has [ [box, (text, conf)], ... ]
for line in result[0]:
txt = line[1][0]
conf = float(line[1][1])
if conf >= CONF_THRESHOLD:
lines.append((txt, conf))
return lines
def read_image(filepath: str) -> Image.Image:
"""
Open an image robustly via PIL (also handles TIFF, JPG, PNG).
"""
with Image.open(filepath) as im:
return im.convert("RGB")
def read_pdf_pages(filepath: str) -> List[Image.Image]:
"""
Render each PDF page to a PIL image (RGB) using PyMuPDF.
"""
pages: List[Image.Image] = []
with fitz.open(filepath) as doc:
for page in doc:
# Render with a scale factor for better OCR accuracy
mat = fitz.Matrix(2, 2) # 2x upscaling
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
pages.append(img)
return pages
def extract_text_from_file(filepath: str) -> str:
"""
Dispatch by file type; return plain text.
"""
lower = filepath.lower()
if lower.endswith(".pdf"):
pages = read_pdf_pages(filepath)
all_text: List[str] = []
for i, pil_img in enumerate(pages, start=1):
lines = ocr_image(pil_img)
page_text = "\n".join([t for t, _ in lines])
# Add a page header for clarity on multi-page docs
all_text.append(f"--- Page {i} ---\n{page_text}".strip())
return "\n\n".join([s for s in all_text if s])
elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
img = read_image(filepath)
lines = ocr_image(img)
return "\n".join([t for t, _ in lines]).strip()
else:
raise ValueError("Unsupported file type. Please upload an image (PNG/JPG/TIFF/WEBP/BMP) or a PDF.")
def infer(file_obj) -> str:
try:
if file_obj is None:
return "No file uploaded."
filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
text = extract_text_from_file(filepath)
# 🔊 Console telemetry: dump raw text to terminal
print("\n================ OCR RAW TEXT ================\n")
print(text)
print("\n==================== END =====================\n", flush=True)
return text or "[No text detected]"
except Exception as e:
traceback.print_exc()
return f"Error during OCR: {e}"
# ------------- Gradio UI ----------------
TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
DESC = (
"Upload an image or PDF. The app runs PaddleOCR (PP-OCRv4 pipeline) and returns plain text. "
"Set `OCR_LANG`, `OCR_USE_GPU`, and `OCR_CONF_THRESHOLD` as env vars to tune."
)
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}\n{DESC}")
with gr.Row():
file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
run_btn = gr.Button("Run OCR", variant="primary")
run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
# Also trigger on file change for convenience
file_in.change(fn=infer, inputs=[file_in], outputs=[out])
if __name__ == "__main__":
# Tip: Set server_name="0.0.0.0" for containers; share=True for quick external testing
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)