Spaces:

KarthiEz
/

Paddleocr

Sleeping

App Files Files Community

KarthiEz commited on Oct 28

Commit

8e08792

verified ·

1 Parent(s): 402ad7d

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -59

app.py CHANGED Viewed

@@ -1,4 +1,16 @@
 import os
 import io
 import sys
 import json
@@ -12,39 +24,50 @@ import cv2
 import gradio as gr
 from paddleocr import PaddleOCR
-# --------- Config knobs (safe defaults) ----------
-LANG = os.getenv("OCR_LANG", "en")          # e.g., "en", "ar", "en_number", "en_PP-OCRv3"
-USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true"
 DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
 REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
-CLS = True                                  # angle classification
-CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0"))  # 0.0 → keep everything
-# Initialize once (download models once, reuse across requests)
-# Tip: If you want Arabic/English mixed, set LANG="ar" or "en" variants per PaddleOCR docs
-OCR = PaddleOCR(
-    use_angle_cls=CLS,
-    lang=LANG,
-    use_gpu=USE_GPU,
-    det_model_dir=None,   # use default
-    rec_model_dir=None,   # use default
-    show_log=False
-)
 def _pil_to_cv(img: Image.Image) -> np.ndarray:
-    """PIL RGB -> OpenCV BGR ndarray"""
     return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
 def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
-    """
-    Run OCR on a PIL image and return list of (text, confidence).
-    """
     img_cv = _pil_to_cv(pil_img)
-    result = OCR.ocr(img_cv, cls=CLS)
     lines: List[Tuple[str, float]] = []
     if not result:
         return lines
-    # PaddleOCR returns a list per image; each item has [ [box, (text, conf)], ... ]
     for line in result[0]:
         txt = line[1][0]
         conf = float(line[1][1])
@@ -53,46 +76,33 @@ def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
     return lines
 def read_image(filepath: str) -> Image.Image:
-    """
-    Open an image robustly via PIL (also handles TIFF, JPG, PNG).
-    """
     with Image.open(filepath) as im:
         return im.convert("RGB")
-def read_pdf_pages(filepath: str) -> List[Image.Image]:
-    """
-    Render each PDF page to a PIL image (RGB) using PyMuPDF.
-    """
-    pages: List[Image.Image] = []
     with fitz.open(filepath) as doc:
         for page in doc:
-            # Render with a scale factor for better OCR accuracy
-            mat = fitz.Matrix(2, 2)  # 2x upscaling
             pix = page.get_pixmap(matrix=mat, alpha=False)
             img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
             pages.append(img)
     return pages
 def extract_text_from_file(filepath: str) -> str:
-    """
-    Dispatch by file type; return plain text.
-    """
     lower = filepath.lower()
     if lower.endswith(".pdf"):
-        pages = read_pdf_pages(filepath)
-        all_text: List[str] = []
-        for i, pil_img in enumerate(pages, start=1):
-            lines = ocr_image(pil_img)
             page_text = "\n".join([t for t, _ in lines])
-            # Add a page header for clarity on multi-page docs
-            all_text.append(f"--- Page {i} ---\n{page_text}".strip())
-        return "\n\n".join([s for s in all_text if s])
     elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
-        img = read_image(filepath)
-        lines = ocr_image(img)
         return "\n".join([t for t, _ in lines]).strip()
     else:
-        raise ValueError("Unsupported file type. Please upload an image (PNG/JPG/TIFF/WEBP/BMP) or a PDF.")
 def infer(file_obj) -> str:
     try:
@@ -100,21 +110,14 @@ def infer(file_obj) -> str:
             return "No file uploaded."
         filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
         text = extract_text_from_file(filepath)
-        # 🔊 Console telemetry: dump raw text to terminal
-        print("\n================ OCR RAW TEXT ================\n")
-        print(text)
-        print("\n==================== END =====================\n", flush=True)
         return text or "[No text detected]"
     except Exception as e:
         traceback.print_exc()
         return f"Error during OCR: {e}"
-# ------------- Gradio UI ----------------
 TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
-DESC = (
-    "Upload an image or PDF. The app runs PaddleOCR (PP-OCRv4 pipeline) and returns plain text. "
-    "Set `OCR_LANG`, `OCR_USE_GPU`, and `OCR_CONF_THRESHOLD` as env vars to tune."
-)
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"# {TITLE}\n{DESC}")
@@ -122,12 +125,10 @@ with gr.Blocks(title=TITLE) as demo:
         file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
     out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
     run_btn = gr.Button("Run OCR", variant="primary")
     run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
-    # Also trigger on file change for convenience
     file_in.change(fn=infer, inputs=[file_in], outputs=[out])
 if __name__ == "__main__":
-    # Tip: Set server_name="0.0.0.0" for containers; share=True for quick external testing
-    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

+# app.py
 import os
+# --- Space-safe flags (place BEFORE importing paddle/paddleocr) ---
+os.environ.setdefault("FLAGS_use_mkldnn", "0")
+os.environ.setdefault("FLAGS_enable_mkldnn", "0")
+os.environ.setdefault("OMP_NUM_THREADS", "1")
+os.environ.setdefault("KMP_BLOCKTIME", "0")
+# Gradio on Spaces uses these
+os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
+os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
 import io
 import sys
 import json
 import gradio as gr
 from paddleocr import PaddleOCR
+# --------- Config knobs ----------
+LANG = os.getenv("OCR_LANG", "en")
+USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true"  # Spaces CPU → keep false
 DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
 REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
+CLS = True
+CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0"))
 def _pil_to_cv(img: Image.Image) -> np.ndarray:
     return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+def _build_ocr(use_cls: bool) -> PaddleOCR:
+    return PaddleOCR(
+        use_angle_cls=use_cls,
+        lang=LANG,
+        use_gpu=USE_GPU,
+        det_model_dir=None,
+        rec_model_dir=None,
+        show_log=False
+    )
+# Primary OCR instance (CLS on). If CLS crashes, we'll rebuild w/o CLS just-in-time.
+_OCR = _build_ocr(CLS)
 def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
     img_cv = _pil_to_cv(pil_img)
+    def _run(ocr: PaddleOCR, cls_flag: bool):
+        return ocr.ocr(img_cv, cls=cls_flag)
+    try:
+        result = _run(_OCR, CLS)
+    except RuntimeError as e:
+        msg = str(e).lower()
+        if "primitive" in msg or "mkldnn" in msg or "predictor.run" in msg:
+            # One-time fallback without angle classifier
+            fallback_ocr = _build_ocr(False)
+            result = _run(fallback_ocr, False)
+        else:
+            raise
     lines: List[Tuple[str, float]] = []
     if not result:
         return lines
     for line in result[0]:
         txt = line[1][0]
         conf = float(line[1][1])
     return lines
 def read_image(filepath: str) -> Image.Image:
     with Image.open(filepath) as im:
         return im.convert("RGB")
+def read_pdf_pages(filepath: str):
+    pages = []
     with fitz.open(filepath) as doc:
         for page in doc:
+            mat = fitz.Matrix(2, 2)
             pix = page.get_pixmap(matrix=mat, alpha=False)
             img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
             pages.append(img)
     return pages
 def extract_text_from_file(filepath: str) -> str:
     lower = filepath.lower()
     if lower.endswith(".pdf"):
+        texts = []
+        for i, img in enumerate(read_pdf_pages(filepath), start=1):
+            lines = ocr_image(img)
             page_text = "\n".join([t for t, _ in lines])
+            texts.append(f"--- Page {i} ---\n{page_text}".strip())
+        return "\n\n".join([t for t in texts if t])
     elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
+        lines = ocr_image(read_image(filepath))
         return "\n".join([t for t, _ in lines]).strip()
     else:
+        raise ValueError("Unsupported file type. Upload an image or a PDF.")
 def infer(file_obj) -> str:
     try:
             return "No file uploaded."
         filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
         text = extract_text_from_file(filepath)
+        print("\n===== OCR RAW TEXT =====\n", text, "\n===== END =====\n", flush=True)
         return text or "[No text detected]"
     except Exception as e:
         traceback.print_exc()
         return f"Error during OCR: {e}"
 TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
+DESC = "Upload an image or PDF. Runs PP-OCRv4 on CPU with Space-safe settings."
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"# {TITLE}\n{DESC}")
         file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
     out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
     run_btn = gr.Button("Run OCR", variant="primary")
     run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
     file_in.change(fn=infer, inputs=[file_in], outputs=[out])
 if __name__ == "__main__":
+    demo.launch(server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
+                server_port=int(os.getenv("PORT", "7860")),
+                show_error=True)