Spaces:

KarthiEz
/

Paddleocr

Running

App Files Files Community

KarthiEz commited on Oct 27

Commit

11c7f99

verified ·

1 Parent(s): 769fd42

Create app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import io
+import sys
+import json
+import traceback
+from typing import List, Tuple
+import numpy as np
+from PIL import Image
+import fitz  # PyMuPDF
+import cv2
+import gradio as gr
+from paddleocr import PaddleOCR
+# --------- Config knobs (safe defaults) ----------
+LANG = os.getenv("OCR_LANG", "en")          # e.g., "en", "ar", "en_number", "en_PP-OCRv3"
+USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true"
+DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
+REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
+CLS = True                                  # angle classification
+CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0"))  # 0.0 → keep everything
+# Initialize once (download models once, reuse across requests)
+# Tip: If you want Arabic/English mixed, set LANG="ar" or "en" variants per PaddleOCR docs
+OCR = PaddleOCR(
+    use_angle_cls=CLS,
+    lang=LANG,
+    use_gpu=USE_GPU,
+    det_model_dir=None,   # use default
+    rec_model_dir=None,   # use default
+    show_log=False
+)
+def _pil_to_cv(img: Image.Image) -> np.ndarray:
+    """PIL RGB -> OpenCV BGR ndarray"""
+    return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
+    """
+    Run OCR on a PIL image and return list of (text, confidence).
+    """
+    img_cv = _pil_to_cv(pil_img)
+    result = OCR.ocr(img_cv, cls=CLS)
+    lines: List[Tuple[str, float]] = []
+    if not result:
+        return lines
+    # PaddleOCR returns a list per image; each item has [ [box, (text, conf)], ... ]
+    for line in result[0]:
+        txt = line[1][0]
+        conf = float(line[1][1])
+        if conf >= CONF_THRESHOLD:
+            lines.append((txt, conf))
+    return lines
+def read_image(filepath: str) -> Image.Image:
+    """
+    Open an image robustly via PIL (also handles TIFF, JPG, PNG).
+    """
+    with Image.open(filepath) as im:
+        return im.convert("RGB")
+def read_pdf_pages(filepath: str) -> List[Image.Image]:
+    """
+    Render each PDF page to a PIL image (RGB) using PyMuPDF.
+    """
+    pages: List[Image.Image] = []
+    with fitz.open(filepath) as doc:
+        for page in doc:
+            # Render with a scale factor for better OCR accuracy
+            mat = fitz.Matrix(2, 2)  # 2x upscaling
+            pix = page.get_pixmap(matrix=mat, alpha=False)
+            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            pages.append(img)
+    return pages
+def extract_text_from_file(filepath: str) -> str:
+    """
+    Dispatch by file type; return plain text.
+    """
+    lower = filepath.lower()
+    if lower.endswith(".pdf"):
+        pages = read_pdf_pages(filepath)
+        all_text: List[str] = []
+        for i, pil_img in enumerate(pages, start=1):
+            lines = ocr_image(pil_img)
+            page_text = "\n".join([t for t, _ in lines])
+            # Add a page header for clarity on multi-page docs
+            all_text.append(f"--- Page {i} ---\n{page_text}".strip())
+        return "\n\n".join([s for s in all_text if s])
+    elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
+        img = read_image(filepath)
+        lines = ocr_image(img)
+        return "\n".join([t for t, _ in lines]).strip()
+    else:
+        raise ValueError("Unsupported file type. Please upload an image (PNG/JPG/TIFF/WEBP/BMP) or a PDF.")
+def infer(file_obj) -> str:
+    try:
+        if file_obj is None:
+            return "No file uploaded."
+        filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
+        text = extract_text_from_file(filepath)
+        # 🔊 Console telemetry: dump raw text to terminal
+        print("\n================ OCR RAW TEXT ================\n")
+        print(text)
+        print("\n==================== END =====================\n", flush=True)
+        return text or "[No text detected]"
+    except Exception as e:
+        traceback.print_exc()
+        return f"Error during OCR: {e}"
+# ------------- Gradio UI ----------------
+TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
+DESC = (
+    "Upload an image or PDF. The app runs PaddleOCR (PP-OCRv4 pipeline) and returns plain text. "
+    "Set `OCR_LANG`, `OCR_USE_GPU`, and `OCR_CONF_THRESHOLD` as env vars to tune."
+)
+with gr.Blocks(title=TITLE) as demo:
+    gr.Markdown(f"# {TITLE}\n{DESC}")
+    with gr.Row():
+        file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
+    out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
+    run_btn = gr.Button("Run OCR", variant="primary")
+    run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
+    # Also trigger on file change for convenience
+    file_in.change(fn=infer, inputs=[file_in], outputs=[out])
+if __name__ == "__main__":
+    # Tip: Set server_name="0.0.0.0" for containers; share=True for quick external testing
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)