Spaces:

iammraat
/

HTR_paddle

Sleeping

App Files Files Community

iammraat commited on Feb 2

Commit

e2f83a2

verified ·

1 Parent(s): 333f06a

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -124

app.py CHANGED Viewed

@@ -1,116 +1,16 @@
-# import gradio as gr
-# import cv2
-# import numpy as np
-# from paddleocr import PaddleOCR
-# from PIL import Image
-# # Initialize PaddleOCR
-# # enable_mkldnn=False is CRITICAL to prevent the C++ crash
-# ocr = PaddleOCR(
-#     use_textline_orientation=True,
-#     lang='en',
-#     ocr_version='PP-OCRv5',
-#     enable_mkldnn=False
-# )
-# def run_ocr(input_image):
-#     if input_image is None:
-#         return None, "No image uploaded"
-#     image_np = np.array(input_image)
-#     # Run OCR
-#     result = ocr.ocr(image_np)
-#     if result is None or not result:
-#         return input_image, "No text detected."
-#     # Unwrap the list if needed (Paddle often returns [result])
-#     if isinstance(result, list) and len(result) == 1:
-#         result = result[0]
-#     detected_texts = []
-#     viz_image = image_np.copy()
-#     # --- PARSING LOGIC FIX ---
-#     # CASE 1: New V5 / PaddleX Format (The structure in your logs)
-#     # It returns a dict with keys: 'rec_texts', 'rec_polys', 'rec_scores'
-#     if isinstance(result, dict) and 'rec_texts' in result:
-#         texts = result.get('rec_texts', [])
-#         boxes = result.get('rec_polys', [])
-#         scores = result.get('rec_scores', [])
-#         # We zip them together to iterate
-#         for box, text, score in zip(boxes, texts, scores):
-#             detected_texts.append(f"{text} (Conf: {score:.2f})")
-#             try:
-#                 # v5 polys are often already numpy arrays, but we ensure shape
-#                 box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
-#                 cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
-#             except Exception:
-#                 pass # Skip drawing if box format is weird, but keep text
-#     # CASE 2: Legacy Format (List of lists)
-#     # [ [box, (text, score)], ... ]
-#     elif isinstance(result, list):
-#         for line in result:
-#             if not isinstance(line, list) or len(line) < 2:
-#                 continue
-#             box = line[0]
-#             text_content = line[1][0]
-#             score = line[1][1]
-#             detected_texts.append(f"{text_content} (Conf: {score:.2f})")
-#             try:
-#                 box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
-#                 cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
-#             except Exception:
-#                 pass
-#     return viz_image, "\n".join(detected_texts)
-# with gr.Blocks(title="PaddleOCR v5 Handwriting Demo") as demo:
-#     gr.Markdown("## ⚡ PaddleOCR v5 (Handwriting Edition)")
-#     with gr.Row():
-#         with gr.Column():
-#             input_img = gr.Image(type="pil", label="Input Document")
-#             submit_btn = gr.Button("Read Handwriting", variant="primary")
-#         with gr.Column():
-#             output_img = gr.Image(label="Detections")
-#             output_text = gr.Textbox(label="Recognized Text", lines=15)
-#     submit_btn.click(fn=run_ocr, inputs=input_img, outputs=[output_img, output_text])
-# if __name__ == "__main__":
-#     demo.launch(server_name="0.0.0.0", server_port=7860)
 import gradio as gr
 import cv2
 import numpy as np
 from paddleocr import PaddleOCR
 from PIL import Image
-# Initialize PaddleOCR with Mobile Weights (PP-OCRv4)
-# ocr_version='PP-OCRv4' defaults to the lightweight "Mobile" model.
-# This is the single biggest factor for CPU speed.
 ocr = PaddleOCR(
     use_textline_orientation=True,
     lang='en',
-    ocr_version='PP-OCRv4',
-    enable_mkldnn=False # Keep False to avoid the C++ crash you saw earlier
 )
 def run_ocr(input_image):
@@ -119,51 +19,46 @@ def run_ocr(input_image):
     image_np = np.array(input_image)
-    # --- OPTIMIZATION: Resize large images ---
-    # OCR on 4k images is slow on CPU. Resizing to ~1280px width usually
-    # keeps text readable but speeds up inference by 2x-4x.
-    height, width = image_np.shape[:2]
-    MAX_WIDTH = 1280
-    if width > MAX_WIDTH:
-        scale = MAX_WIDTH / width
-        new_height = int(height * scale)
-        image_np = cv2.resize(image_np, (MAX_WIDTH, new_height))
     # Run OCR
     result = ocr.ocr(image_np)
     if result is None or not result:
         return input_image, "No text detected."
-    # Unwrap list if nested
     if isinstance(result, list) and len(result) == 1:
         result = result[0]
     detected_texts = []
     viz_image = image_np.copy()
-    # --- ROBUST PARSING (Handles both v4 and v5 formats) ---
-    # Check if result is the new Dictionary format (v5 style)
     if isinstance(result, dict) and 'rec_texts' in result:
         texts = result.get('rec_texts', [])
         boxes = result.get('rec_polys', [])
         scores = result.get('rec_scores', [])
         for box, text, score in zip(boxes, texts, scores):
             detected_texts.append(f"{text} (Conf: {score:.2f})")
             try:
                 box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
                 cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
-            except: pass
-    # Check if result is the Standard List format (v3/v4 style)
     elif isinstance(result, list):
         for line in result:
             if not isinstance(line, list) or len(line) < 2:
                 continue
-            # v4 format: [ [box_coords], (text, confidence) ]
             box = line[0]
             text_content = line[1][0]
             score = line[1][1]
@@ -173,12 +68,13 @@ def run_ocr(input_image):
             try:
                 box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
                 cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
-            except: pass
     return viz_image, "\n".join(detected_texts)
-with gr.Blocks(title="Fast Handwriting OCR") as demo:
-    gr.Markdown("## ⚡ Fast CPU Handwriting OCR")
     with gr.Row():
         with gr.Column():
@@ -192,4 +88,7 @@ with gr.Blocks(title="Fast Handwriting OCR") as demo:
     submit_btn.click(fn=run_ocr, inputs=input_img, outputs=[output_img, output_text])
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import cv2
 import numpy as np
 from paddleocr import PaddleOCR
 from PIL import Image
+# Initialize PaddleOCR
+# enable_mkldnn=False is CRITICAL to prevent the C++ crash
 ocr = PaddleOCR(
     use_textline_orientation=True,
     lang='en',
+    ocr_version='PP-OCRv5',
+    enable_mkldnn=False
 )
 def run_ocr(input_image):
     image_np = np.array(input_image)
     # Run OCR
     result = ocr.ocr(image_np)
     if result is None or not result:
         return input_image, "No text detected."
+    # Unwrap the list if needed (Paddle often returns [result])
     if isinstance(result, list) and len(result) == 1:
         result = result[0]
     detected_texts = []
     viz_image = image_np.copy()
+    # --- PARSING LOGIC FIX ---
+    # CASE 1: New V5 / PaddleX Format (The structure in your logs)
+    # It returns a dict with keys: 'rec_texts', 'rec_polys', 'rec_scores'
     if isinstance(result, dict) and 'rec_texts' in result:
         texts = result.get('rec_texts', [])
         boxes = result.get('rec_polys', [])
         scores = result.get('rec_scores', [])
+        # We zip them together to iterate
         for box, text, score in zip(boxes, texts, scores):
             detected_texts.append(f"{text} (Conf: {score:.2f})")
             try:
+                # v5 polys are often already numpy arrays, but we ensure shape
                 box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
                 cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
+            except Exception:
+                pass # Skip drawing if box format is weird, but keep text
+    # CASE 2: Legacy Format (List of lists)
+    # [ [box, (text, score)], ... ]
     elif isinstance(result, list):
         for line in result:
             if not isinstance(line, list) or len(line) < 2:
                 continue
             box = line[0]
             text_content = line[1][0]
             score = line[1][1]
             try:
                 box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
                 cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
+            except Exception:
+                pass
     return viz_image, "\n".join(detected_texts)
+with gr.Blocks(title="PaddleOCR v5 Handwriting Demo") as demo:
+    gr.Markdown("## ⚡ PaddleOCR v5 (Handwriting Edition)")
     with gr.Row():
         with gr.Column():
     submit_btn.click(fn=run_ocr, inputs=input_img, outputs=[output_img, output_text])
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)