Spaces:
Sleeping
Sleeping
| from paddleocr import PaddleOCR, draw_ocr | |
| import gradio as gr | |
| from PIL import Image | |
| import numpy as np | |
| import cv2 | |
| def draw_boxes_only(image, boxes, txts=None): | |
| img = image.copy() | |
| for idx, box in enumerate(boxes): | |
| # Convert | |
| box = np.array(box).astype(np.int32).reshape((-1, 1, 2)) | |
| # Draw | |
| cv2.polylines(img, [box], isClosed=True, color=(0, 255, 0), thickness=2) | |
| # Draw text near the box | |
| if txts is not None and idx < len(txts): | |
| text = txts[idx] | |
| position = (int(box[0][0][0]), int(box[0][0][1]) - 10) | |
| cv2.putText(img, text, position, cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.5, (0, 0, 255), 1) | |
| return img | |
| def ocr(image): | |
| # Initialize OCR | |
| ocr = PaddleOCR(use_angle_cls=True, | |
| lang='en', | |
| cls_model_dir='angle_cls', | |
| cls_thresh=0.9, | |
| cls_batch_num=1 | |
| ) | |
| # Convert PIL Image to numpy array | |
| img_np = np.array(image) | |
| result = ocr.ocr(img_np, cls=True) | |
| if result and len(result[0]) > 0: | |
| boxes = [line[0] for line in result[0]] | |
| txts = [line[1][0] for line in result[0]] | |
| scores = [line[1][1] for line in result[0]] | |
| # Draw boxes | |
| image_with_boxes = draw_boxes_only(img_np, boxes, txts) | |
| # Convert back to PIL Image | |
| image_with_boxes = Image.fromarray(image_with_boxes) | |
| return image_with_boxes, "\n".join(txts) | |
| return img_np, "No text detected" | |
| # Gradio interface | |
| gr.Interface( | |
| fn=ocr, | |
| inputs=gr.Image(type="pil", label="Upload Receipt Image"), | |
| outputs=[ | |
| gr.Image(type="pil", label="OCR Result", width=600), | |
| gr.Textbox(label="Extracted Text") | |
| ], | |
| title="Receipt OCR using PaddleOCR", | |
| ).launch() |