KarthiEz commited on
Commit
8e08792
·
verified ·
1 Parent(s): 402ad7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -59
app.py CHANGED
@@ -1,4 +1,16 @@
 
1
  import os
 
 
 
 
 
 
 
 
 
 
 
2
  import io
3
  import sys
4
  import json
@@ -12,39 +24,50 @@ import cv2
12
  import gradio as gr
13
  from paddleocr import PaddleOCR
14
 
15
- # --------- Config knobs (safe defaults) ----------
16
- LANG = os.getenv("OCR_LANG", "en") # e.g., "en", "ar", "en_number", "en_PP-OCRv3"
17
- USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true"
18
  DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
19
  REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
20
- CLS = True # angle classification
21
- CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0")) # 0.0 → keep everything
22
-
23
- # Initialize once (download models once, reuse across requests)
24
- # Tip: If you want Arabic/English mixed, set LANG="ar" or "en" variants per PaddleOCR docs
25
- OCR = PaddleOCR(
26
- use_angle_cls=CLS,
27
- lang=LANG,
28
- use_gpu=USE_GPU,
29
- det_model_dir=None, # use default
30
- rec_model_dir=None, # use default
31
- show_log=False
32
- )
33
 
34
  def _pil_to_cv(img: Image.Image) -> np.ndarray:
35
- """PIL RGB -> OpenCV BGR ndarray"""
36
  return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
39
- """
40
- Run OCR on a PIL image and return list of (text, confidence).
41
- """
42
  img_cv = _pil_to_cv(pil_img)
43
- result = OCR.ocr(img_cv, cls=CLS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  lines: List[Tuple[str, float]] = []
45
  if not result:
46
  return lines
47
- # PaddleOCR returns a list per image; each item has [ [box, (text, conf)], ... ]
48
  for line in result[0]:
49
  txt = line[1][0]
50
  conf = float(line[1][1])
@@ -53,46 +76,33 @@ def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
53
  return lines
54
 
55
  def read_image(filepath: str) -> Image.Image:
56
- """
57
- Open an image robustly via PIL (also handles TIFF, JPG, PNG).
58
- """
59
  with Image.open(filepath) as im:
60
  return im.convert("RGB")
61
 
62
- def read_pdf_pages(filepath: str) -> List[Image.Image]:
63
- """
64
- Render each PDF page to a PIL image (RGB) using PyMuPDF.
65
- """
66
- pages: List[Image.Image] = []
67
  with fitz.open(filepath) as doc:
68
  for page in doc:
69
- # Render with a scale factor for better OCR accuracy
70
- mat = fitz.Matrix(2, 2) # 2x upscaling
71
  pix = page.get_pixmap(matrix=mat, alpha=False)
72
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
73
  pages.append(img)
74
  return pages
75
 
76
  def extract_text_from_file(filepath: str) -> str:
77
- """
78
- Dispatch by file type; return plain text.
79
- """
80
  lower = filepath.lower()
81
  if lower.endswith(".pdf"):
82
- pages = read_pdf_pages(filepath)
83
- all_text: List[str] = []
84
- for i, pil_img in enumerate(pages, start=1):
85
- lines = ocr_image(pil_img)
86
  page_text = "\n".join([t for t, _ in lines])
87
- # Add a page header for clarity on multi-page docs
88
- all_text.append(f"--- Page {i} ---\n{page_text}".strip())
89
- return "\n\n".join([s for s in all_text if s])
90
  elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
91
- img = read_image(filepath)
92
- lines = ocr_image(img)
93
  return "\n".join([t for t, _ in lines]).strip()
94
  else:
95
- raise ValueError("Unsupported file type. Please upload an image (PNG/JPG/TIFF/WEBP/BMP) or a PDF.")
96
 
97
  def infer(file_obj) -> str:
98
  try:
@@ -100,21 +110,14 @@ def infer(file_obj) -> str:
100
  return "No file uploaded."
101
  filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
102
  text = extract_text_from_file(filepath)
103
- # 🔊 Console telemetry: dump raw text to terminal
104
- print("\n================ OCR RAW TEXT ================\n")
105
- print(text)
106
- print("\n==================== END =====================\n", flush=True)
107
  return text or "[No text detected]"
108
  except Exception as e:
109
  traceback.print_exc()
110
  return f"Error during OCR: {e}"
111
 
112
- # ------------- Gradio UI ----------------
113
  TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
114
- DESC = (
115
- "Upload an image or PDF. The app runs PaddleOCR (PP-OCRv4 pipeline) and returns plain text. "
116
- "Set `OCR_LANG`, `OCR_USE_GPU`, and `OCR_CONF_THRESHOLD` as env vars to tune."
117
- )
118
 
119
  with gr.Blocks(title=TITLE) as demo:
120
  gr.Markdown(f"# {TITLE}\n{DESC}")
@@ -122,12 +125,10 @@ with gr.Blocks(title=TITLE) as demo:
122
  file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
123
  out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
124
  run_btn = gr.Button("Run OCR", variant="primary")
125
-
126
  run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
127
- # Also trigger on file change for convenience
128
  file_in.change(fn=infer, inputs=[file_in], outputs=[out])
129
 
130
  if __name__ == "__main__":
131
- # Tip: Set server_name="0.0.0.0" for containers; share=True for quick external testing
132
- demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
133
-
 
1
+ # app.py
2
  import os
3
+
4
+ # --- Space-safe flags (place BEFORE importing paddle/paddleocr) ---
5
+ os.environ.setdefault("FLAGS_use_mkldnn", "0")
6
+ os.environ.setdefault("FLAGS_enable_mkldnn", "0")
7
+ os.environ.setdefault("OMP_NUM_THREADS", "1")
8
+ os.environ.setdefault("KMP_BLOCKTIME", "0")
9
+
10
+ # Gradio on Spaces uses these
11
+ os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
12
+ os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
13
+
14
  import io
15
  import sys
16
  import json
 
24
  import gradio as gr
25
  from paddleocr import PaddleOCR
26
 
27
+ # --------- Config knobs ----------
28
+ LANG = os.getenv("OCR_LANG", "en")
29
+ USE_GPU = os.getenv("OCR_USE_GPU", "false").lower() == "true" # Spaces CPU → keep false
30
  DET = os.getenv("OCR_DET_MODEL", "ch_PP-OCRv4_det")
31
  REC = os.getenv("OCR_REC_MODEL", "en_PP-OCRv4")
32
+ CLS = True
33
+ CONF_THRESHOLD = float(os.getenv("OCR_CONF_THRESHOLD", "0.0"))
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def _pil_to_cv(img: Image.Image) -> np.ndarray:
 
36
  return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
37
 
38
+ def _build_ocr(use_cls: bool) -> PaddleOCR:
39
+ return PaddleOCR(
40
+ use_angle_cls=use_cls,
41
+ lang=LANG,
42
+ use_gpu=USE_GPU,
43
+ det_model_dir=None,
44
+ rec_model_dir=None,
45
+ show_log=False
46
+ )
47
+
48
+ # Primary OCR instance (CLS on). If CLS crashes, we'll rebuild w/o CLS just-in-time.
49
+ _OCR = _build_ocr(CLS)
50
+
51
  def ocr_image(pil_img: Image.Image) -> List[Tuple[str, float]]:
 
 
 
52
  img_cv = _pil_to_cv(pil_img)
53
+
54
+ def _run(ocr: PaddleOCR, cls_flag: bool):
55
+ return ocr.ocr(img_cv, cls=cls_flag)
56
+
57
+ try:
58
+ result = _run(_OCR, CLS)
59
+ except RuntimeError as e:
60
+ msg = str(e).lower()
61
+ if "primitive" in msg or "mkldnn" in msg or "predictor.run" in msg:
62
+ # One-time fallback without angle classifier
63
+ fallback_ocr = _build_ocr(False)
64
+ result = _run(fallback_ocr, False)
65
+ else:
66
+ raise
67
+
68
  lines: List[Tuple[str, float]] = []
69
  if not result:
70
  return lines
 
71
  for line in result[0]:
72
  txt = line[1][0]
73
  conf = float(line[1][1])
 
76
  return lines
77
 
78
  def read_image(filepath: str) -> Image.Image:
 
 
 
79
  with Image.open(filepath) as im:
80
  return im.convert("RGB")
81
 
82
+ def read_pdf_pages(filepath: str):
83
+ pages = []
 
 
 
84
  with fitz.open(filepath) as doc:
85
  for page in doc:
86
+ mat = fitz.Matrix(2, 2)
 
87
  pix = page.get_pixmap(matrix=mat, alpha=False)
88
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
89
  pages.append(img)
90
  return pages
91
 
92
  def extract_text_from_file(filepath: str) -> str:
 
 
 
93
  lower = filepath.lower()
94
  if lower.endswith(".pdf"):
95
+ texts = []
96
+ for i, img in enumerate(read_pdf_pages(filepath), start=1):
97
+ lines = ocr_image(img)
 
98
  page_text = "\n".join([t for t, _ in lines])
99
+ texts.append(f"--- Page {i} ---\n{page_text}".strip())
100
+ return "\n\n".join([t for t in texts if t])
 
101
  elif lower.endswith((".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp", ".webp")):
102
+ lines = ocr_image(read_image(filepath))
 
103
  return "\n".join([t for t, _ in lines]).strip()
104
  else:
105
+ raise ValueError("Unsupported file type. Upload an image or a PDF.")
106
 
107
  def infer(file_obj) -> str:
108
  try:
 
110
  return "No file uploaded."
111
  filepath = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
112
  text = extract_text_from_file(filepath)
113
+ print("\n===== OCR RAW TEXT =====\n", text, "\n===== END =====\n", flush=True)
 
 
 
114
  return text or "[No text detected]"
115
  except Exception as e:
116
  traceback.print_exc()
117
  return f"Error during OCR: {e}"
118
 
 
119
  TITLE = "PaddleOCR Text Extractor (Images & PDFs)"
120
+ DESC = "Upload an image or PDF. Runs PP-OCRv4 on CPU with Space-safe settings."
 
 
 
121
 
122
  with gr.Blocks(title=TITLE) as demo:
123
  gr.Markdown(f"# {TITLE}\n{DESC}")
 
125
  file_in = gr.File(label="Upload Image or PDF", file_count="single", file_types=["image", ".pdf"])
126
  out = gr.Textbox(label="Extracted Text", lines=25, show_copy_button=True)
127
  run_btn = gr.Button("Run OCR", variant="primary")
 
128
  run_btn.click(fn=infer, inputs=[file_in], outputs=[out])
 
129
  file_in.change(fn=infer, inputs=[file_in], outputs=[out])
130
 
131
  if __name__ == "__main__":
132
+ demo.launch(server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
133
+ server_port=int(os.getenv("PORT", "7860")),
134
+ show_error=True)