Spaces:
Sleeping
Sleeping
| # File: ocr_processor.py | |
| import numpy as np | |
| from paddleocr import PaddleOCR | |
| from PIL import Image | |
| import io | |
| # Initialize PaddleOCR with modern, compatible settings | |
| ocr = PaddleOCR(lang='en', use_angle_cls=True) | |
| def extract_text_from_image(image_bytes: bytes) -> str: | |
| """ | |
| Performs OCR on a given image using PaddleOCR. | |
| """ | |
| try: | |
| # 1. Convert bytes to PIL Image | |
| img = Image.open(io.BytesIO(image_bytes)) | |
| img = img.convert("RGB") | |
| img_array = np.array(img) | |
| # 2. Run OCR | |
| result = ocr.ocr(img_array) | |
| # 3. Extract and combine the recognized text | |
| if result and result[0]: | |
| text_lines = [line[1][0] for line in result[0]] | |
| return " ".join(text_lines) | |
| else: | |
| return "No text detected in the image." | |
| except Exception as e: | |
| return f"An error occurred during OCR: {str(e)}" |