File size: 4,597 Bytes
8e08792 11c7f99 8e08792 346fc60 11c7f99 346fc60 11c7f99 346fc60 11c7f99 8e08792 346fc60 8e08792 11c7f99 8e08792 346fc60 8e08792 346fc60 11c7f99 8e08792 346fc60 11c7f99 8e08792 11c7f99 346fc60 11c7f99 8e08792 346fc60 8e08792 11c7f99 8e08792 346fc60 11c7f99 8e08792 346fc60 8e08792 346fc60 8e08792 11c7f99 346fc60 11c7f99 8e08792 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# app.py
import os
# --- Space-safe flags (place BEFORE importing paddle/paddleocr) ---
os.environ.setdefault("FLAGS_use_mkldnn", "0")
os.environ.setdefault("FLAGS_enable_mkldnn", "0")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("KMP_BLOCKTIME", "0")
# Gradio on Spaces uses these
os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
import io
import sys
import json
import traceback
from typing import List, Tuple
import numpy as np
from PIL import Image
import fitz # PyMuPDF
import cv2
import gradio as gr
from paddleocr import PaddleOCR
# --------- Config knobs ----------
LANG = os.getenv("OCR_LANG", "en")
USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true" # Spaces CPU → keep false
DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
CLS = True
CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0"))
def _pil_to_cv(img: Image.Image) -> np.ndarray:
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
def _build_ocr(use_cls: bool) -> PaddleOCR:
return PaddleOCR(
use_angle_cls=use_cls,
lang=LANG,
use_gpu=USE_GPU,
det_model_dir=None,
rec_model_dir=None,
show_log=False
)
# Primary OCR instance (CLS on). If CLS crashes, we'll rebuild w/o CLS just-in-time.
_OCR = _build_ocr(CLS)
def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
img_cv = _pil_to_cv(pil_img)
def _run(ocr: PaddleOCR, cls_flag: bool):
return ocr.ocr(img_cv, cls=cls_flag)
try:
result = _run(_OCR, CLS)
except RuntimeError as e:
msg = str(e).lower()
if "primitive" in msg or "mkldnn" in msg or "predictor.run" in msg:
# One-time fallback without angle classifier
fallback_ocr = _build_ocr(False)
result = _run(fallback_ocr, False)
else:
raise
lines: List[Tuple[str, float]] = []
if not result:
return lines
for line in result[0]:
txt = line[1][0]
conf = float(line[1][1])
if conf >= CONF_THRESHOLD:
lines.append((txt, conf))
return lines
def read_image(filepath: str) -> Image.Image:
with Image.open(filepath) as im:
return im.convert("RGB")
def read_pdf_pages(filepath: str):
pages = []
with fitz.open(filepath) as doc:
for page in doc:
mat = fitz.Matrix(2, 2)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
pages.append(img)
return pages
def extract_text_from_file(filepath: str) -> str:
lower = filepath.lower()
if lower.endswith(".pdf"):
texts = []
for i, img in enumerate(read_pdf_pages(filepath), start=1):
lines = ocr_image(img)
page_text = "\n".join([t for t, _ in lines])
texts.append(f"--- Page {i} ---\n{page_text}".strip())
return "\n\n".join([t for t in texts if t])
elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
lines = ocr_image(read_image(filepath))
return "\n".join([t for t, _ in lines]).strip()
else:
raise ValueError("Unsupported file type. Upload an image or a PDF.")
def infer(file_obj) -> str:
try:
if file_obj is None:
return "No file uploaded."
filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
text = extract_text_from_file(filepath)
print("\n===== OCR RAW TEXT =====\n", text, "\n===== END =====\n", flush=True)
return text or "[No text detected]"
except Exception as e:
traceback.print_exc()
return f"Error during OCR: {e}"
TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
DESC = "Upload an image or PDF. Runs PP-OCRv4 on CPU with Space-safe settings."
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}\n{DESC}")
with gr.Row():
file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
run_btn = gr.Button("Run OCR", variant="primary")
run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
file_in.change(fn=infer, inputs=[file_in], outputs=[out])
if __name__ == "__main__":
demo.launch(server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.getenv("PORT", "7860")),
show_error=True)
|